diff options
Diffstat (limited to 'lib')
566 files changed, 32107 insertions, 18511 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 503fbbdab8d67..1f2528fa560f2 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -65,10 +65,127 @@ void AliasAnalysis::copyValue(Value *From, Value *To) { } AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) { - // FIXME: we can do better. +AliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { + // Don't assert AA because BasicAA calls us in order to make use of the + // logic here. + + ModRefBehavior MRB = getModRefBehavior(CS); + if (MRB == DoesNotAccessMemory) + return NoModRef; + + ModRefResult Mask = ModRef; + if (MRB == OnlyReadsMemory) + Mask = Ref; + else if (MRB == AliasAnalysis::AccessesArguments) { + bool doesAlias = false; + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) + if (!isNoAlias(*AI, ~0U, P, Size)) { + doesAlias = true; + break; + } + + if (!doesAlias) + return NoModRef; + } + + // If P points to a constant memory location, the call definitely could not + // modify the memory location. + if ((Mask & Mod) && pointsToConstantMemory(P)) + Mask = ModRefResult(Mask & ~Mod); + + // If this is BasicAA, don't forward. + if (!AA) return Mask; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any mask we've managed to compute. + return ModRefResult(AA->getModRefInfo(CS, P, Size) & Mask); +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { + // Don't assert AA because BasicAA calls us in order to make use of the + // logic here. + + // If CS1 or CS2 are readnone, they don't interact. + ModRefBehavior CS1B = getModRefBehavior(CS1); + if (CS1B == DoesNotAccessMemory) return NoModRef; + + ModRefBehavior CS2B = getModRefBehavior(CS2); + if (CS2B == DoesNotAccessMemory) return NoModRef; + + // If they both only read from memory, there is no dependence. + if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory) + return NoModRef; + + AliasAnalysis::ModRefResult Mask = ModRef; + + // If CS1 only reads memory, the only dependence on CS2 can be + // from CS1 reading memory written by CS2. + if (CS1B == OnlyReadsMemory) + Mask = ModRefResult(Mask & Ref); + + // If CS2 only access memory through arguments, accumulate the mod/ref + // information from CS1's references to the memory referenced by + // CS2's arguments. + if (CS2B == AccessesArguments) { + AliasAnalysis::ModRefResult R = NoModRef; + for (ImmutableCallSite::arg_iterator + I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { + R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask); + if (R == Mask) + break; + } + return R; + } + + // If CS1 only accesses memory through arguments, check if CS2 references + // any of the memory referenced by CS1's arguments. If not, return NoModRef. + if (CS1B == AccessesArguments) { + AliasAnalysis::ModRefResult R = NoModRef; + for (ImmutableCallSite::arg_iterator + I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) + if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) { + R = Mask; + break; + } + if (R == NoModRef) + return R; + } + + // If this is BasicAA, don't forward. + if (!AA) return Mask; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any mask we've managed to compute. + return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask); +} + +AliasAnalysis::ModRefBehavior +AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + // Don't assert AA because BasicAA calls us in order to make use of the + // logic here. + + ModRefBehavior Min = UnknownModRefBehavior; + + // Call back into the alias analysis with the other form of getModRefBehavior + // to see if it can give a better response. + if (const Function *F = CS.getCalledFunction()) + Min = getModRefBehavior(F); + + // If this is BasicAA, don't forward. + if (!AA) return Min; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any result we've managed to compute. + return std::min(AA->getModRefBehavior(CS), Min); +} + +AliasAnalysis::ModRefBehavior +AliasAnalysis::getModRefBehavior(const Function *F) { assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->getModRefInfo(CS1, CS2); + return AA->getModRefBehavior(F); } @@ -77,87 +194,63 @@ AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) { //===----------------------------------------------------------------------===// AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(LoadInst *L, Value *P, unsigned Size) { - return alias(L->getOperand(0), getTypeStoreSize(L->getType()), - P, Size) ? Ref : NoModRef; +AliasAnalysis::getModRefInfo(const LoadInst *L, const Value *P, unsigned Size) { + // Be conservative in the face of volatile. + if (L->isVolatile()) + return ModRef; + + // If the load address doesn't alias the given address, it doesn't read + // or write the specified memory. + if (!alias(L->getOperand(0), getTypeStoreSize(L->getType()), P, Size)) + return NoModRef; + + // Otherwise, a load just reads. + return Ref; } AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(StoreInst *S, Value *P, unsigned Size) { - // If the stored address cannot alias the pointer in question, then the - // pointer cannot be modified by the store. +AliasAnalysis::getModRefInfo(const StoreInst *S, const Value *P, unsigned Size) { + // Be conservative in the face of volatile. + if (S->isVolatile()) + return ModRef; + + // If the store address cannot alias the pointer in question, then the + // specified memory cannot be modified by the store. if (!alias(S->getOperand(1), getTypeStoreSize(S->getOperand(0)->getType()), P, Size)) return NoModRef; // If the pointer is a pointer to constant memory, then it could not have been // modified by this store. - return pointsToConstantMemory(P) ? NoModRef : Mod; -} - -AliasAnalysis::ModRefBehavior -AliasAnalysis::getModRefBehavior(CallSite CS, - std::vector<PointerAccessInfo> *Info) { - if (CS.doesNotAccessMemory()) - // Can't do better than this. - return DoesNotAccessMemory; - ModRefBehavior MRB = getModRefBehavior(CS.getCalledFunction(), Info); - if (MRB != DoesNotAccessMemory && CS.onlyReadsMemory()) - return OnlyReadsMemory; - return MRB; -} - -AliasAnalysis::ModRefBehavior -AliasAnalysis::getModRefBehavior(Function *F, - std::vector<PointerAccessInfo> *Info) { - if (F) { - if (F->doesNotAccessMemory()) - // Can't do better than this. - return DoesNotAccessMemory; - if (F->onlyReadsMemory()) - return OnlyReadsMemory; - if (unsigned id = F->getIntrinsicID()) - return getModRefBehavior(id); - } - return UnknownModRefBehavior; -} + if (pointsToConstantMemory(P)) + return NoModRef; -AliasAnalysis::ModRefBehavior AliasAnalysis::getModRefBehavior(unsigned iid) { -#define GET_INTRINSIC_MODREF_BEHAVIOR -#include "llvm/Intrinsics.gen" -#undef GET_INTRINSIC_MODREF_BEHAVIOR + // Otherwise, a store just writes. + return Mod; } AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { - ModRefBehavior MRB = getModRefBehavior(CS); - if (MRB == DoesNotAccessMemory) +AliasAnalysis::getModRefInfo(const VAArgInst *V, const Value *P, unsigned Size) { + // If the va_arg address cannot alias the pointer in question, then the + // specified memory cannot be accessed by the va_arg. + if (!alias(V->getOperand(0), UnknownSize, P, Size)) return NoModRef; - - ModRefResult Mask = ModRef; - if (MRB == OnlyReadsMemory) - Mask = Ref; - else if (MRB == AliasAnalysis::AccessesArguments) { - bool doesAlias = false; - for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - AI != AE; ++AI) - if (!isNoAlias(*AI, ~0U, P, Size)) { - doesAlias = true; - break; - } - if (!doesAlias) - return NoModRef; - } + // If the pointer is a pointer to constant memory, then it could not have been + // modified by this va_arg. + if (pointsToConstantMemory(P)) + return NoModRef; - if (!AA) return Mask; + // Otherwise, a va_arg reads and writes. + return ModRef; +} - // If P points to a constant memory location, the call definitely could not - // modify the memory location. - if ((Mask & Mod) && AA->pointsToConstantMemory(P)) - Mask = ModRefResult(Mask & ~Mod); - return ModRefResult(Mask & AA->getModRefInfo(CS, P, Size)); +AliasAnalysis::ModRefBehavior +AliasAnalysis::getIntrinsicModRefBehavior(unsigned iid) { +#define GET_INTRINSIC_MODREF_BEHAVIOR +#include "llvm/Intrinsics.gen" +#undef GET_INTRINSIC_MODREF_BEHAVIOR } // AliasAnalysis destructor: DO NOT move this to the header file for @@ -206,12 +299,12 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, const Value *Ptr, unsigned Size) { assert(I1.getParent() == I2.getParent() && "Instructions not in same basic block!"); - BasicBlock::iterator I = const_cast<Instruction*>(&I1); - BasicBlock::iterator E = const_cast<Instruction*>(&I2); + BasicBlock::const_iterator I = &I1; + BasicBlock::const_iterator E = &I2; ++E; // Convert from inclusive to exclusive range. for (; I != E; ++I) // Check every instruction in range - if (getModRefInfo(I, const_cast<Value*>(Ptr), Size) & Mod) + if (getModRefInfo(I, Ptr, Size) & Mod) return true; return false; } @@ -220,7 +313,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, /// function. bool llvm::isNoAliasCall(const Value *V) { if (isa<CallInst>(V) || isa<InvokeInst>(V)) - return CallSite(const_cast<Instruction*>(cast<Instruction>(V))) + return ImmutableCallSite(cast<Instruction>(V)) .paramHasAttr(0, Attribute::NoAlias); return false; } diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index 1053955ea2339..b17804186a63a 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -34,7 +34,7 @@ namespace { Module *M; public: static char ID; // Class identification, replacement for typeinfo - AliasAnalysisCounter() : ModulePass(&ID) { + AliasAnalysisCounter() : ModulePass(ID) { No = May = Must = 0; NoMR = JustRef = JustMod = MR = 0; } @@ -87,8 +87,8 @@ namespace { /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) { - if (PI->isPassID(&AliasAnalysis::ID)) + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; } @@ -103,17 +103,18 @@ namespace { AliasResult alias(const Value *V1, unsigned V1Size, const Value *V2, unsigned V2Size); - ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); - ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) { + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size); + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { return AliasAnalysis::getModRefInfo(CS1,CS2); } }; } char AliasAnalysisCounter::ID = 0; -static RegisterPass<AliasAnalysisCounter> -X("count-aa", "Count Alias Analysis Query Responses", false, true); -static RegisterAnalysisGroup<AliasAnalysis> Y(X); +INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa", + "Count Alias Analysis Query Responses", false, true, false); ModulePass *llvm::createAliasAnalysisCounterPass() { return new AliasAnalysisCounter(); @@ -146,7 +147,8 @@ AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size, } AliasAnalysis::ModRefResult -AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) { +AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, P, Size); const char *MRString; diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 37ee9fc22c9b9..ce363cbc7bbd6 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -50,7 +50,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - AAEval() : FunctionPass(&ID) {} + AAEval() : FunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); @@ -74,8 +74,8 @@ namespace { } char AAEval::ID = 0; -static RegisterPass<AAEval> -X("aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true); +INITIALIZE_PASS(AAEval, "aa-eval", + "Exhaustive Alias Analysis Precision Evaluator", false, true); FunctionPass *llvm::createAAEvalPass() { return new AAEval(); } @@ -107,6 +107,15 @@ PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr, } } +static inline void +PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB, + Module *M) { + if (P) { + errs() << " " << Msg << ": " << *CSA.getInstruction() + << " <-> " << *CSB.getInstruction() << '\n'; + } +} + static inline bool isInterestingPointer(Value *V) { return V->getType()->isPointerTy() && !isa<ConstantPointerNull>(V); @@ -126,8 +135,7 @@ bool AAEval::runOnFunction(Function &F) { if (I->getType()->isPointerTy()) // Add all pointer instructions. Pointers.insert(&*I); Instruction &Inst = *I; - CallSite CS = CallSite::get(&Inst); - if (CS) { + if (CallSite CS = cast<Value>(&Inst)) { Value *Callee = CS.getCalledValue(); // Skip actual functions for direct function calls. if (!isa<Function>(Callee) && isInterestingPointer(Callee)) @@ -137,6 +145,7 @@ bool AAEval::runOnFunction(Function &F) { AI != AE; ++AI) if (isInterestingPointer(*AI)) Pointers.insert(*AI); + CallSites.insert(CS); } else { // Consider all operands. for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end(); @@ -144,8 +153,6 @@ bool AAEval::runOnFunction(Function &F) { if (isInterestingPointer(*OI)) Pointers.insert(*OI); } - - if (CS.getInstruction()) CallSites.insert(CS); } if (PrintNoAlias || PrintMayAlias || PrintMustAlias || @@ -197,13 +204,13 @@ bool AAEval::runOnFunction(Function &F) { PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent()); ++NoModRef; break; case AliasAnalysis::Mod: - PrintModRefResults(" Mod", PrintMod, I, *V, F.getParent()); + PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent()); ++Mod; break; case AliasAnalysis::Ref: - PrintModRefResults(" Ref", PrintRef, I, *V, F.getParent()); + PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent()); ++Ref; break; case AliasAnalysis::ModRef: - PrintModRefResults(" ModRef", PrintModRef, I, *V, F.getParent()); + PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent()); ++ModRef; break; default: errs() << "Unknown alias query result!\n"; @@ -211,6 +218,29 @@ bool AAEval::runOnFunction(Function &F) { } } + // Mod/ref alias analysis: compare all pairs of calls + for (SetVector<CallSite>::iterator C = CallSites.begin(), + Ce = CallSites.end(); C != Ce; ++C) { + for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) { + if (D == C) + continue; + switch (AA.getModRefInfo(*C, *D)) { + case AliasAnalysis::NoModRef: + PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent()); + ++NoModRef; break; + case AliasAnalysis::Mod: + PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent()); + ++Mod; break; + case AliasAnalysis::Ref: + PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent()); + ++Ref; break; + case AliasAnalysis::ModRef: + PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent()); + ++ModRef; break; + } + } + } + return false; } diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp index bc2d9c55d1837..b9fe64608c01c 100644 --- a/lib/Analysis/AliasDebugger.cpp +++ b/lib/Analysis/AliasDebugger.cpp @@ -39,7 +39,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - AliasDebugger() : ModulePass(&ID) {} + AliasDebugger() : ModulePass(ID) {} bool runOnModule(Module &M) { InitializeAliasAnalysis(this); // set up super class @@ -83,8 +83,8 @@ namespace { /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) { - if (PI->isPassID(&AliasAnalysis::ID)) + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; } @@ -99,12 +99,14 @@ namespace { return AliasAnalysis::alias(V1, V1Size, V2, V2Size); } - ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) { + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { assert(Vals.find(P) != Vals.end() && "Never seen value in AA before"); return AliasAnalysis::getModRefInfo(CS, P, Size); } - ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) { + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { return AliasAnalysis::getModRefInfo(CS1,CS2); } @@ -126,9 +128,8 @@ namespace { } char AliasDebugger::ID = 0; -static RegisterPass<AliasDebugger> -X("debug-aa", "AA use debugger", false, true); -static RegisterAnalysisGroup<AliasAnalysis> Y(X); +INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa", + "AA use debugger", false, true, false); Pass *llvm::createAliasDebugger() { return new AliasDebugger(); } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 02aff50d8a13a..e74543bb508aa 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -22,7 +22,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -35,6 +34,7 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { // Update the alias and access types of this set... AccessTy |= AS.AccessTy; AliasTy |= AS.AliasTy; + Volatile |= AS.Volatile; if (AliasTy == MustAlias) { // Check that these two merged sets really are must aliases. Since both @@ -111,11 +111,11 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, *PtrListEnd = &Entry; PtrListEnd = Entry.setPrevInList(PtrListEnd); assert(*PtrListEnd == 0 && "End of list is not null?"); - addRef(); // Entry points to alias set... + addRef(); // Entry points to alias set. } void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) { - CallSites.push_back(CS); + CallSites.push_back(CS.getInstruction()); AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS); if (Behavior == AliasAnalysis::DoesNotAccessMemory) @@ -140,7 +140,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size, assert(CallSites.empty() && "Illegal must alias set!"); // If this is a set of MustAliases, only check to see if the pointer aliases - // SOME value in the set... + // SOME value in the set. PointerRec *SomePtr = getSomePointer(); assert(SomePtr && "Empty must-alias set??"); return AA.alias(SomePtr->getValue(), SomePtr->getSize(), Ptr, Size); @@ -155,8 +155,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size, // Check the call sites list and invoke list... if (!CallSites.empty()) { for (unsigned i = 0, e = CallSites.size(); i != e; ++i) - if (AA.getModRefInfo(CallSites[i], const_cast<Value*>(Ptr), Size) - != AliasAnalysis::NoModRef) + if (AA.getModRefInfo(CallSites[i], Ptr, Size) != AliasAnalysis::NoModRef) return true; } @@ -167,10 +166,11 @@ bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const { if (AA.doesNotAccessMemory(CS)) return false; - for (unsigned i = 0, e = CallSites.size(); i != e; ++i) - if (AA.getModRefInfo(CallSites[i], CS) != AliasAnalysis::NoModRef || - AA.getModRefInfo(CS, CallSites[i]) != AliasAnalysis::NoModRef) + for (unsigned i = 0, e = CallSites.size(); i != e; ++i) { + if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef || + AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef) return true; + } for (iterator I = begin(), E = end(); I != E; ++I) if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) != @@ -200,14 +200,15 @@ void AliasSetTracker::clear() { AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, unsigned Size) { AliasSet *FoundSet = 0; - for (iterator I = begin(), E = end(); I != E; ++I) - if (!I->Forward && I->aliasesPointer(Ptr, Size, AA)) { - if (FoundSet == 0) { // If this is the first alias set ptr can go into. - FoundSet = I; // Remember it. - } else { // Otherwise, we must merge the sets. - FoundSet->mergeSetIn(*I, *this); // Merge in contents. - } + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward || !I->aliasesPointer(Ptr, Size, AA)) continue; + + if (FoundSet == 0) { // If this is the first alias set ptr can go into. + FoundSet = I; // Remember it. + } else { // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*I, *this); // Merge in contents. } + } return FoundSet; } @@ -226,15 +227,15 @@ bool AliasSetTracker::containsPointer(Value *Ptr, unsigned Size) const { AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) { AliasSet *FoundSet = 0; - for (iterator I = begin(), E = end(); I != E; ++I) - if (!I->Forward && I->aliasesCallSite(CS, AA)) { - if (FoundSet == 0) { // If this is the first alias set ptr can go into. - FoundSet = I; // Remember it. - } else if (!I->Forward) { // Otherwise, we must merge the sets. - FoundSet->mergeSetIn(*I, *this); // Merge in contents. - } - } - + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward || !I->aliasesCallSite(CS, AA)) + continue; + + if (FoundSet == 0) // If this is the first alias set ptr can go into. + FoundSet = I; // Remember it. + else if (!I->Forward) // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*I, *this); // Merge in contents. + } return FoundSet; } @@ -247,22 +248,24 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, unsigned Size, bool *New) { AliasSet::PointerRec &Entry = getEntryFor(Pointer); - // Check to see if the pointer is already known... + // Check to see if the pointer is already known. if (Entry.hasAliasSet()) { Entry.updateSize(Size); // Return the set! return *Entry.getAliasSet(*this)->getForwardedTarget(*this); - } else if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) { - // Add it to the alias set it aliases... + } + + if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) { + // Add it to the alias set it aliases. AS->addPointer(*this, Entry, Size); return *AS; - } else { - if (New) *New = true; - // Otherwise create a new alias set to hold the loaded pointer... - AliasSets.push_back(new AliasSet()); - AliasSets.back().addPointer(*this, Entry, Size); - return AliasSets.back(); } + + if (New) *New = true; + // Otherwise create a new alias set to hold the loaded pointer. + AliasSets.push_back(new AliasSet()); + AliasSets.back().addPointer(*this, Entry, Size); + return AliasSets.back(); } bool AliasSetTracker::add(Value *Ptr, unsigned Size) { @@ -305,28 +308,27 @@ bool AliasSetTracker::add(CallSite CS) { return true; // doesn't alias anything AliasSet *AS = findAliasSetForCallSite(CS); - if (!AS) { - AliasSets.push_back(new AliasSet()); - AS = &AliasSets.back(); - AS->addCallSite(CS, AA); - return true; - } else { + if (AS) { AS->addCallSite(CS, AA); return false; } + AliasSets.push_back(new AliasSet()); + AS = &AliasSets.back(); + AS->addCallSite(CS, AA); + return true; } bool AliasSetTracker::add(Instruction *I) { - // Dispatch to one of the other add methods... + // Dispatch to one of the other add methods. if (LoadInst *LI = dyn_cast<LoadInst>(I)) return add(LI); - else if (StoreInst *SI = dyn_cast<StoreInst>(I)) + if (StoreInst *SI = dyn_cast<StoreInst>(I)) return add(SI); - else if (CallInst *CI = dyn_cast<CallInst>(I)) + if (CallInst *CI = dyn_cast<CallInst>(I)) return add(CI); - else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + if (InvokeInst *II = dyn_cast<InvokeInst>(I)) return add(II); - else if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) + if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) return add(VAAI); return true; } @@ -343,23 +345,23 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { // Loop over all of the alias sets in AST, adding the pointers contained // therein into the current alias sets. This can cause alias sets to be // merged together in the current AST. - for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) - if (!I->Forward) { // Ignore forwarding alias sets - AliasSet &AS = const_cast<AliasSet&>(*I); - - // If there are any call sites in the alias set, add them to this AST. - for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i) - add(AS.CallSites[i]); - - // Loop over all of the pointers in this alias set... - AliasSet::iterator I = AS.begin(), E = AS.end(); - bool X; - for (; I != E; ++I) { - AliasSet &NewAS = addPointer(I.getPointer(), I.getSize(), - (AliasSet::AccessType)AS.AccessTy, X); - if (AS.isVolatile()) NewAS.setVolatile(); - } + for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) { + if (I->Forward) continue; // Ignore forwarding alias sets + + AliasSet &AS = const_cast<AliasSet&>(*I); + + // If there are any call sites in the alias set, add them to this AST. + for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i) + add(AS.CallSites[i]); + + // Loop over all of the pointers in this alias set. + bool X; + for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { + AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(), + (AliasSet::AccessType)AS.AccessTy, X); + if (AS.isVolatile()) NewAS.setVolatile(); } + } } /// remove - Remove the specified (potentially non-empty) alias set from the @@ -435,11 +437,11 @@ bool AliasSetTracker::remove(Instruction *I) { // Dispatch to one of the other remove methods... if (LoadInst *LI = dyn_cast<LoadInst>(I)) return remove(LI); - else if (StoreInst *SI = dyn_cast<StoreInst>(I)) + if (StoreInst *SI = dyn_cast<StoreInst>(I)) return remove(SI); - else if (CallInst *CI = dyn_cast<CallInst>(I)) + if (CallInst *CI = dyn_cast<CallInst>(I)) return remove(CI); - else if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) + if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) return remove(VAAI); return true; } @@ -455,12 +457,17 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { AA.deleteValue(PtrVal); // If this is a call instruction, remove the callsite from the appropriate - // AliasSet. - CallSite CS = CallSite::get(PtrVal); - if (CS.getInstruction()) - if (!AA.doesNotAccessMemory(CS)) - if (AliasSet *AS = findAliasSetForCallSite(CS)) - AS->removeCallSite(CS); + // AliasSet (if present). + if (CallSite CS = PtrVal) { + if (!AA.doesNotAccessMemory(CS)) { + // Scan all the alias sets to see if this call site is contained. + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward) continue; + + I->removeCallSite(CS); + } + } + } // First, look up the PointerRec for this pointer. PointerMapType::iterator I = PointerMap.find(PtrVal); @@ -510,7 +517,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { //===----------------------------------------------------------------------===// void AliasSet::print(raw_ostream &OS) const { - OS << " AliasSet[" << format("0x%p", (void*)this) << "," << RefCount << "] "; + OS << " AliasSet[" << (void*)this << ", " << RefCount << "] "; OS << (AliasTy == MustAlias ? "must" : "may") << " alias, "; switch (AccessTy) { case NoModRef: OS << "No access "; break; @@ -536,7 +543,7 @@ void AliasSet::print(raw_ostream &OS) const { OS << "\n " << CallSites.size() << " Call Sites: "; for (unsigned i = 0, e = CallSites.size(); i != e; ++i) { if (i) OS << ", "; - WriteAsOperand(OS, CallSites[i].getCalledValue()); + WriteAsOperand(OS, CallSites[i]); } } OS << "\n"; @@ -580,7 +587,7 @@ namespace { AliasSetTracker *Tracker; public: static char ID; // Pass identification, replacement for typeid - AliasSetPrinter() : FunctionPass(&ID) {} + AliasSetPrinter() : FunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -600,5 +607,5 @@ namespace { } char AliasSetPrinter::ID = 0; -static RegisterPass<AliasSetPrinter> -X("print-alias-sets", "Alias Set Printer", false, true); +INITIALIZE_PASS(AliasSetPrinter, "print-alias-sets", + "Alias Set Printer", false, true); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 4f53a6d62559b..113c72b94dac5 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -18,6 +18,7 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" @@ -30,6 +31,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include <algorithm> using namespace llvm; @@ -137,8 +139,8 @@ namespace { /// struct NoAA : public ImmutablePass, public AliasAnalysis { static char ID; // Class identification, replacement for typeinfo - NoAA() : ImmutablePass(&ID) {} - explicit NoAA(void *PID) : ImmutablePass(PID) { } + NoAA() : ImmutablePass(ID) {} + explicit NoAA(char &PID) : ImmutablePass(PID) { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { } @@ -152,16 +154,20 @@ namespace { return MayAlias; } - virtual void getArgumentAccesses(Function *F, CallSite CS, - std::vector<PointerAccessInfo> &Info) { - llvm_unreachable("This method may not be called on this function!"); + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + return UnknownModRefBehavior; + } + virtual ModRefBehavior getModRefBehavior(const Function *F) { + return UnknownModRefBehavior; } virtual bool pointsToConstantMemory(const Value *P) { return false; } - virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) { + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { return ModRef; } - virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) { + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { return ModRef; } @@ -169,11 +175,11 @@ namespace { virtual void copyValue(Value *From, Value *To) {} /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it should - /// override this to adjust the this pointer as needed for the specified pass - /// info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) { - if (PI->isPassID(&AliasAnalysis::ID)) + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; } @@ -182,15 +188,279 @@ namespace { // Register this pass... char NoAA::ID = 0; -static RegisterPass<NoAA> -U("no-aa", "No Alias Analysis (always returns 'may' alias)", true, true); - -// Declare that we implement the AliasAnalysis interface -static RegisterAnalysisGroup<AliasAnalysis> V(U); +INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa", + "No Alias Analysis (always returns 'may' alias)", + true, true, false); ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } //===----------------------------------------------------------------------===// +// GetElementPtr Instruction Decomposition and Analysis +//===----------------------------------------------------------------------===// + +namespace { + enum ExtensionKind { + EK_NotExtended, + EK_SignExt, + EK_ZeroExt + }; + + struct VariableGEPIndex { + const Value *V; + ExtensionKind Extension; + int64_t Scale; + }; +} + + +/// GetLinearExpression - Analyze the specified value as a linear expression: +/// "A*V + B", where A and B are constant integers. Return the scale and offset +/// values as APInts and return V as a Value*, and return whether we looked +/// through any sign or zero extends. The incoming Value is known to have +/// IntegerType and it may already be sign or zero extended. +/// +/// Note that this looks through extends, so the high bits may not be +/// represented in the result. +static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, + ExtensionKind &Extension, + const TargetData &TD, unsigned Depth) { + assert(V->getType()->isIntegerTy() && "Not an integer value"); + + // Limit our recursion depth. + if (Depth == 6) { + Scale = 1; + Offset = 0; + return V; + } + + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) { + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) { + switch (BOp->getOpcode()) { + default: break; + case Instruction::Or: + // X|C == X+C if all the bits in C are unset in X. Otherwise we can't + // analyze it. + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD)) + break; + // FALL THROUGH. + case Instruction::Add: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset += RHSC->getValue(); + return V; + case Instruction::Mul: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset *= RHSC->getValue(); + Scale *= RHSC->getValue(); + return V; + case Instruction::Shl: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset <<= RHSC->getValue().getLimitedValue(); + Scale <<= RHSC->getValue().getLimitedValue(); + return V; + } + } + } + + // Since GEP indices are sign extended anyway, we don't care about the high + // bits of a sign or zero extended value - just scales and offsets. The + // extensions have to be consistent though. + if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) || + (isa<ZExtInst>(V) && Extension != EK_SignExt)) { + Value *CastOp = cast<CastInst>(V)->getOperand(0); + unsigned OldWidth = Scale.getBitWidth(); + unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); + Scale.trunc(SmallWidth); + Offset.trunc(SmallWidth); + Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt; + + Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, + TD, Depth+1); + Scale.zext(OldWidth); + Offset.zext(OldWidth); + + return Result; + } + + Scale = 1; + Offset = 0; + return V; +} + +/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it +/// into a base pointer with a constant offset and a number of scaled symbolic +/// offsets. +/// +/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in +/// the VarIndices vector) are Value*'s that are known to be scaled by the +/// specified amount, but which may have other unrepresented high bits. As such, +/// the gep cannot necessarily be reconstructed from its decomposed form. +/// +/// When TargetData is around, this function is capable of analyzing everything +/// that Value::getUnderlyingObject() can look through. When not, it just looks +/// through pointer casts. +/// +static const Value * +DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, + SmallVectorImpl<VariableGEPIndex> &VarIndices, + const TargetData *TD) { + // Limit recursion depth to limit compile time in crazy cases. + unsigned MaxLookup = 6; + + BaseOffs = 0; + do { + // See if this is a bitcast or GEP. + const Operator *Op = dyn_cast<Operator>(V); + if (Op == 0) { + // The only non-operator case we can handle are GlobalAliases. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (!GA->mayBeOverridden()) { + V = GA->getAliasee(); + continue; + } + } + return V; + } + + if (Op->getOpcode() == Instruction::BitCast) { + V = Op->getOperand(0); + continue; + } + + const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); + if (GEPOp == 0) + return V; + + // Don't attempt to analyze GEPs over unsized objects. + if (!cast<PointerType>(GEPOp->getOperand(0)->getType()) + ->getElementType()->isSized()) + return V; + + // If we are lacking TargetData information, we can't compute the offets of + // elements computed by GEPs. However, we can handle bitcast equivalent + // GEPs. + if (TD == 0) { + if (!GEPOp->hasAllZeroIndices()) + return V; + V = GEPOp->getOperand(0); + continue; + } + + // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. + gep_type_iterator GTI = gep_type_begin(GEPOp); + for (User::const_op_iterator I = GEPOp->op_begin()+1, + E = GEPOp->op_end(); I != E; ++I) { + Value *Index = *I; + // Compute the (potentially symbolic) offset in bytes for this index. + if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { + // For a struct, add the member offset. + unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); + if (FieldNo == 0) continue; + + BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo); + continue; + } + + // For an array/pointer, add the element offset, explicitly scaled. + if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { + if (CIdx->isZero()) continue; + BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); + continue; + } + + uint64_t Scale = TD->getTypeAllocSize(*GTI); + ExtensionKind Extension = EK_NotExtended; + + // If the integer type is smaller than the pointer size, it is implicitly + // sign extended to pointer size. + unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth(); + if (TD->getPointerSizeInBits() > Width) + Extension = EK_SignExt; + + // Use GetLinearExpression to decompose the index into a C1*V+C2 form. + APInt IndexScale(Width, 0), IndexOffset(Width, 0); + Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, + *TD, 0); + + // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. + // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. + BaseOffs += IndexOffset.getZExtValue()*Scale; + Scale *= IndexScale.getZExtValue(); + + + // If we already had an occurrance of this index variable, merge this + // scale into it. For example, we want to handle: + // A[x][x] -> x*16 + x*4 -> x*20 + // This also ensures that 'x' only appears in the index list once. + for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) { + if (VarIndices[i].V == Index && + VarIndices[i].Extension == Extension) { + Scale += VarIndices[i].Scale; + VarIndices.erase(VarIndices.begin()+i); + break; + } + } + + // Make sure that we have a scale that makes sense for this target's + // pointer size. + if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) { + Scale <<= ShiftBits; + Scale >>= ShiftBits; + } + + if (Scale) { + VariableGEPIndex Entry = {Index, Extension, Scale}; + VarIndices.push_back(Entry); + } + } + + // Analyze the base pointer next. + V = GEPOp->getOperand(0); + } while (--MaxLookup); + + // If the chain of expressions is too deep, just return early. + return V; +} + +/// GetIndexDifference - Dest and Src are the variable indices from two +/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base +/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic +/// difference between the two pointers. +static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, + const SmallVectorImpl<VariableGEPIndex> &Src) { + if (Src.empty()) return; + + for (unsigned i = 0, e = Src.size(); i != e; ++i) { + const Value *V = Src[i].V; + ExtensionKind Extension = Src[i].Extension; + int64_t Scale = Src[i].Scale; + + // Find V in Dest. This is N^2, but pointer indices almost never have more + // than a few variable indexes. + for (unsigned j = 0, e = Dest.size(); j != e; ++j) { + if (Dest[j].V != V || Dest[j].Extension != Extension) continue; + + // If we found it, subtract off Scale V's from the entry in Dest. If it + // goes to zero, remove the entry. + if (Dest[j].Scale != Scale) + Dest[j].Scale -= Scale; + else + Dest.erase(Dest.begin()+j); + Scale = 0; + break; + } + + // If we didn't consume this entry, add it to the end of the Dest list. + if (Scale) { + VariableGEPIndex Entry = { V, Extension, -Scale }; + Dest.push_back(Entry); + } + } +} + +//===----------------------------------------------------------------------===// // BasicAliasAnalysis Pass //===----------------------------------------------------------------------===// @@ -220,10 +490,10 @@ namespace { /// derives from the NoAA class. struct BasicAliasAnalysis : public NoAA { static char ID; // Class identification, replacement for typeinfo - BasicAliasAnalysis() : NoAA(&ID) {} + BasicAliasAnalysis() : NoAA(ID) {} - AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { + virtual AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { assert(Visited.empty() && "Visited must be cleared after use!"); assert(notDifferentParent(V1, V2) && "BasicAliasAnalysis doesn't support interprocedural queries."); @@ -232,19 +502,33 @@ namespace { return Alias; } - ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); - ModRefResult getModRefInfo(CallSite CS1, CallSite CS2); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size); + + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // The AliasAnalysis base class has some smarts, lets use them. + return AliasAnalysis::getModRefInfo(CS1, CS2); + } /// pointsToConstantMemory - Chase pointers until we find a (constant /// global) or not. - bool pointsToConstantMemory(const Value *P); + virtual bool pointsToConstantMemory(const Value *P); + + /// getModRefBehavior - Return the behavior when calling the given + /// call site. + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + + /// getModRefBehavior - Return the behavior when calling the given function. + /// For use when the call site is not known. + virtual ModRefBehavior getModRefBehavior(const Function *F); /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it should - /// override this to adjust the this pointer as needed for the specified pass - /// info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) { - if (PI->isPassID(&AliasAnalysis::ID)) + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; } @@ -275,11 +559,9 @@ namespace { // Register this pass... char BasicAliasAnalysis::ID = 0; -static RegisterPass<BasicAliasAnalysis> -X("basicaa", "Basic Alias Analysis (default AA impl)", false, true); - -// Declare that we implement the AliasAnalysis interface -static RegisterAnalysisGroup<AliasAnalysis, true> Y(X); +INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa", + "Basic Alias Analysis (default AA impl)", + false, true, true); ImmutablePass *llvm::createBasicAliasAnalysisPass() { return new BasicAliasAnalysis(); @@ -295,16 +577,50 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) { // global to be marked constant in some modules and non-constant in others. // GV may even be a declaration, not a definition. return GV->isConstant(); - return false; + + return NoAA::pointsToConstantMemory(P); } +/// getModRefBehavior - Return the behavior when calling the given call site. +AliasAnalysis::ModRefBehavior +BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + if (CS.doesNotAccessMemory()) + // Can't do better than this. + return DoesNotAccessMemory; + + ModRefBehavior Min = UnknownModRefBehavior; + + // If the callsite knows it only reads memory, don't return worse + // than that. + if (CS.onlyReadsMemory()) + Min = OnlyReadsMemory; + + // The AliasAnalysis base class has some smarts, lets use them. + return std::min(AliasAnalysis::getModRefBehavior(CS), Min); +} + +/// getModRefBehavior - Return the behavior when calling the given function. +/// For use when the call site is not known. +AliasAnalysis::ModRefBehavior +BasicAliasAnalysis::getModRefBehavior(const Function *F) { + if (F->doesNotAccessMemory()) + // Can't do better than this. + return DoesNotAccessMemory; + if (F->onlyReadsMemory()) + return OnlyReadsMemory; + if (unsigned id = F->getIntrinsicID()) + return getIntrinsicModRefBehavior(id); + + return NoAA::getModRefBehavior(F); +} /// getModRefInfo - Check to see if the specified callsite can clobber the /// specified memory object. Since we only look at local properties of this /// function, we really can't say much about this query. We do, however, use /// simple "address taken" analysis on local objects. AliasAnalysis::ModRefResult -BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { +BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { assert(notDifferentParent(CS.getInstruction(), P) && "AliasAnalysis query involving multiple functions!"); @@ -316,7 +632,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { // the current function not to the current function, and a tail callee // may reference them. if (isa<AllocaInst>(Object)) - if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) + if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) if (CI->isTailCall()) return NoModRef; @@ -327,7 +643,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { isNonEscapingLocalObject(Object)) { bool PassedAsArg = false; unsigned ArgNo = 0; - for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI, ++ArgNo) { // Only look at the no-capture pointer arguments. if (!(*CI)->getType()->isPointerTy() || @@ -338,7 +654,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't // escape. - if (!isNoAlias(cast<Value>(CI), ~0U, P, ~0U)) { + if (!isNoAlias(cast<Value>(CI), UnknownSize, P, UnknownSize)) { PassedAsArg = true; break; } @@ -349,127 +665,76 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { } // Finally, handle specific knowledge of intrinsics. - IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); - if (II == 0) - return AliasAnalysis::getModRefInfo(CS, P, Size); - - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::memcpy: - case Intrinsic::memmove: { - unsigned Len = ~0U; - if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) - Len = LenCI->getZExtValue(); - Value *Dest = II->getArgOperand(0); - Value *Src = II->getArgOperand(1); - if (isNoAlias(Dest, Len, P, Size)) { - if (isNoAlias(Src, Len, P, Size)) - return NoModRef; - return Ref; - } - break; - } - case Intrinsic::memset: - // Since memset is 'accesses arguments' only, the AliasAnalysis base class - // will handle it for the variable length case. - if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) { - unsigned Len = LenCI->getZExtValue(); + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); + if (II != 0) + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::memcpy: + case Intrinsic::memmove: { + unsigned Len = UnknownSize; + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) + Len = LenCI->getZExtValue(); Value *Dest = II->getArgOperand(0); - if (isNoAlias(Dest, Len, P, Size)) + Value *Src = II->getArgOperand(1); + if (isNoAlias(Dest, Len, P, Size)) { + if (isNoAlias(Src, Len, P, Size)) + return NoModRef; + return Ref; + } + break; + } + case Intrinsic::memset: + // Since memset is 'accesses arguments' only, the AliasAnalysis base class + // will handle it for the variable length case. + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) { + unsigned Len = LenCI->getZExtValue(); + Value *Dest = II->getArgOperand(0); + if (isNoAlias(Dest, Len, P, Size)) + return NoModRef; + } + break; + case Intrinsic::atomic_cmp_swap: + case Intrinsic::atomic_swap: + case Intrinsic::atomic_load_add: + case Intrinsic::atomic_load_sub: + case Intrinsic::atomic_load_and: + case Intrinsic::atomic_load_nand: + case Intrinsic::atomic_load_or: + case Intrinsic::atomic_load_xor: + case Intrinsic::atomic_load_max: + case Intrinsic::atomic_load_min: + case Intrinsic::atomic_load_umax: + case Intrinsic::atomic_load_umin: + if (TD) { + Value *Op1 = II->getArgOperand(0); + unsigned Op1Size = TD->getTypeStoreSize(Op1->getType()); + if (isNoAlias(Op1, Op1Size, P, Size)) + return NoModRef; + } + break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: { + unsigned PtrSize = + cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); + if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size)) return NoModRef; + break; } - break; - case Intrinsic::atomic_cmp_swap: - case Intrinsic::atomic_swap: - case Intrinsic::atomic_load_add: - case Intrinsic::atomic_load_sub: - case Intrinsic::atomic_load_and: - case Intrinsic::atomic_load_nand: - case Intrinsic::atomic_load_or: - case Intrinsic::atomic_load_xor: - case Intrinsic::atomic_load_max: - case Intrinsic::atomic_load_min: - case Intrinsic::atomic_load_umax: - case Intrinsic::atomic_load_umin: - if (TD) { - Value *Op1 = II->getArgOperand(0); - unsigned Op1Size = TD->getTypeStoreSize(Op1->getType()); - if (isNoAlias(Op1, Op1Size, P, Size)) + case Intrinsic::invariant_end: { + unsigned PtrSize = + cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); + if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size)) return NoModRef; + break; + } } - break; - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: { - unsigned PtrSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); - if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size)) - return NoModRef; - break; - } - case Intrinsic::invariant_end: { - unsigned PtrSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); - if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size)) - return NoModRef; - break; - } - } // The AliasAnalysis base class has some smarts, lets use them. return AliasAnalysis::getModRefInfo(CS, P, Size); } -AliasAnalysis::ModRefResult -BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) { - // If CS1 or CS2 are readnone, they don't interact. - ModRefBehavior CS1B = AliasAnalysis::getModRefBehavior(CS1); - if (CS1B == DoesNotAccessMemory) return NoModRef; - - ModRefBehavior CS2B = AliasAnalysis::getModRefBehavior(CS2); - if (CS2B == DoesNotAccessMemory) return NoModRef; - - // If they both only read from memory, just return ref. - if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory) - return Ref; - - // Otherwise, fall back to NoAA (mod+ref). - return NoAA::getModRefInfo(CS1, CS2); -} - -/// GetIndiceDifference - Dest and Src are the variable indices from two -/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base -/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic -/// difference between the two pointers. -static void GetIndiceDifference( - SmallVectorImpl<std::pair<const Value*, int64_t> > &Dest, - const SmallVectorImpl<std::pair<const Value*, int64_t> > &Src) { - if (Src.empty()) return; - - for (unsigned i = 0, e = Src.size(); i != e; ++i) { - const Value *V = Src[i].first; - int64_t Scale = Src[i].second; - - // Find V in Dest. This is N^2, but pointer indices almost never have more - // than a few variable indexes. - for (unsigned j = 0, e = Dest.size(); j != e; ++j) { - if (Dest[j].first != V) continue; - - // If we found it, subtract off Scale V's from the entry in Dest. If it - // goes to zero, remove the entry. - if (Dest[j].second != Scale) - Dest[j].second -= Scale; - else - Dest.erase(Dest.begin()+j); - Scale = 0; - break; - } - - // If we didn't consume this entry, add it to the end of the Dest list. - if (Scale) - Dest.push_back(std::make_pair(V, -Scale)); - } -} - /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction /// against another pointer. We know that V1 is a GEP, but we don't know /// anything about V2. UnderlyingV1 is GEP1->getUnderlyingObject(), @@ -488,13 +753,14 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, return MayAlias; int64_t GEP1BaseOffset; - SmallVector<std::pair<const Value*, int64_t>, 4> GEP1VariableIndices; + SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; // If we have two gep instructions with must-alias'ing base pointers, figure // out if the indexes to the GEP tell us anything about the derived pointer. if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { // Do the base pointers alias? - AliasResult BaseAlias = aliasCheck(UnderlyingV1, ~0U, UnderlyingV2, ~0U); + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, + UnderlyingV2, UnknownSize); // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. @@ -507,7 +773,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); int64_t GEP2BaseOffset; - SmallVector<std::pair<const Value*, int64_t>, 4> GEP2VariableIndices; + SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); @@ -523,7 +789,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, // Subtract the GEP2 pointer from the GEP1 pointer to find out their // symbolic difference. GEP1BaseOffset -= GEP2BaseOffset; - GetIndiceDifference(GEP1VariableIndices, GEP2VariableIndices); + GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices); } else { // Check to see if these two pointers are related by the getelementptr @@ -531,10 +797,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, // pointer, we know they cannot alias. // If both accesses are unknown size, we can't do anything useful here. - if (V1Size == ~0U && V2Size == ~0U) + if (V1Size == UnknownSize && V2Size == UnknownSize) return MayAlias; - AliasResult R = aliasCheck(UnderlyingV1, ~0U, V2, V2Size); + AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, V2, V2Size); if (R != MustAlias) // If V2 may alias GEP base pointer, conservatively returns MayAlias. // If V2 is known not to alias GEP base pointer, then the two values @@ -578,8 +844,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, // provides an offset of 4 bytes (assuming a <= 4 byte access). for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e && GEP1BaseOffset;++i) - if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].second) - GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].second; + if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale) + GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale; // If our known offset is bigger than the access size, we know we don't have // an alias. @@ -782,8 +1048,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. if (TD) - if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, *TD)) || - (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, *TD))) + if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) || + (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD))) return NoAlias; // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the @@ -810,7 +1076,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) return aliasSelect(S1, V1Size, V2, V2Size); - return MayAlias; + return NoAA::alias(V1, V1Size, V2, V2Size); } // Make sure that anything that uses AliasAnalysis pulls in this file. diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index e06704bd897ca..617a362062fc2 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -25,7 +25,7 @@ using namespace llvm; namespace { struct CFGViewer : public FunctionPass { static char ID; // Pass identifcation, replacement for typeid - CFGViewer() : FunctionPass(&ID) {} + CFGViewer() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F) { F.viewCFG(); @@ -41,13 +41,12 @@ namespace { } char CFGViewer::ID = 0; -static RegisterPass<CFGViewer> -V0("view-cfg", "View CFG of function", false, true); +INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true); namespace { struct CFGOnlyViewer : public FunctionPass { static char ID; // Pass identifcation, replacement for typeid - CFGOnlyViewer() : FunctionPass(&ID) {} + CFGOnlyViewer() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F) { F.viewCFGOnly(); @@ -63,15 +62,14 @@ namespace { } char CFGOnlyViewer::ID = 0; -static RegisterPass<CFGOnlyViewer> -V1("view-cfg-only", - "View CFG of function (with no function bodies)", false, true); +INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only", + "View CFG of function (with no function bodies)", false, true); namespace { struct CFGPrinter : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGPrinter() : FunctionPass(&ID) {} - explicit CFGPrinter(void *pid) : FunctionPass(pid) {} + CFGPrinter() : FunctionPass(ID) {} + explicit CFGPrinter(char &pid) : FunctionPass(pid) {} virtual bool runOnFunction(Function &F) { std::string Filename = "cfg." + F.getNameStr() + ".dot"; @@ -97,14 +95,14 @@ namespace { } char CFGPrinter::ID = 0; -static RegisterPass<CFGPrinter> -P1("dot-cfg", "Print CFG of function to 'dot' file", false, true); +INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", + false, true); namespace { struct CFGOnlyPrinter : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGOnlyPrinter() : FunctionPass(&ID) {} - explicit CFGOnlyPrinter(void *pid) : FunctionPass(pid) {} + CFGOnlyPrinter() : FunctionPass(ID) {} + explicit CFGOnlyPrinter(char &pid) : FunctionPass(pid) {} virtual bool runOnFunction(Function &F) { std::string Filename = "cfg." + F.getNameStr() + ".dot"; errs() << "Writing '" << Filename << "'..."; @@ -128,9 +126,9 @@ namespace { } char CFGOnlyPrinter::ID = 0; -static RegisterPass<CFGOnlyPrinter> -P2("dot-cfg-only", - "Print CFG of function to 'dot' file (with no function bodies)", false, true); +INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only", + "Print CFG of function to 'dot' file (with no function bodies)", + false, true); /// viewCFG - This function is meant for use from the debugger. You can just /// say 'call F->viewCFG()' and a ghostview window should pop up from the diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index d9b670dea58d2..6a2ab681d1acf 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -38,12 +38,15 @@ add_llvm_library(LLVMAnalysis ProfileInfoLoader.cpp ProfileInfoLoaderPass.cpp ProfileVerifierPass.cpp + RegionInfo.cpp + RegionPrinter.cpp ScalarEvolution.cpp ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionExpander.cpp ScalarEvolutionNormalization.cpp SparsePropagation.cpp Trace.cpp + TypeBasedAliasAnalysis.cpp ValueTracking.cpp ) diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 047825884ef35..90eae20858fb9 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -69,7 +69,7 @@ bool llvm::PointerMayBeCaptured(const Value *V, switch (I->getOpcode()) { case Instruction::Call: case Instruction::Invoke: { - CallSite CS = CallSite::get(I); + CallSite CS(I); // Not captured if the callee is readonly, doesn't return a copy through // its return value and doesn't unwind (a readonly function can leak bits // by throwing an exception or not depending on the input value). diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 13d8f4de48248..0bf7967e83b13 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -778,9 +778,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, case Instruction::ICmp: case Instruction::FCmp: assert(0 && "Invalid for compares"); case Instruction::Call: - if (Function *F = dyn_cast<Function>(Ops[CallInst::ArgOffset ? 0:NumOps-1])) + if (Function *F = dyn_cast<Function>(Ops[NumOps - 1])) if (canConstantFoldCallTo(F)) - return ConstantFoldCall(F, Ops+CallInst::ArgOffset, NumOps-1); + return ConstantFoldCall(F, Ops, NumOps - 1); return 0; case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp index 3532b052dc558..0567750606104 100644 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ b/lib/Analysis/DbgInfoPrinter.cpp @@ -40,7 +40,7 @@ namespace { void printVariableDeclaration(const Value *V); public: static char ID; // Pass identification - PrintDbgInfo() : FunctionPass(&ID), Out(outs()) {} + PrintDbgInfo() : FunctionPass(ID), Out(errs()) {} virtual bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -48,8 +48,8 @@ namespace { } }; char PrintDbgInfo::ID = 0; - static RegisterPass<PrintDbgInfo> X("print-dbginfo", - "Print debug info in human readable form"); + INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo", + "Print debug info in human readable form", false, false); } FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); } diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index c8d0d22ec2e10..5ca89c658df6f 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DebugInfo.h" -#include "llvm/Target/TargetMachine.h" // FIXME: LAYERING VIOLATION! #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Intrinsics.h" @@ -22,6 +21,8 @@ #include "llvm/Module.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/raw_ostream.h" @@ -32,7 +33,22 @@ using namespace llvm::dwarf; // DIDescriptor //===----------------------------------------------------------------------===// -StringRef +DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) { +} + +StringRef DIDescriptor::getStringField(unsigned Elt) const { if (DbgNode == 0) return StringRef(); @@ -60,7 +76,8 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { return DIDescriptor(); if (Elt < DbgNode->getNumOperands()) - return DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt))); + return + DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt))); return DIDescriptor(); } @@ -73,6 +90,15 @@ GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { return 0; } +Constant *DIDescriptor::getConstantField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt)); + return 0; +} + Function *DIDescriptor::getFunctionField(unsigned Elt) const { if (DbgNode == 0) return 0; @@ -109,6 +135,7 @@ bool DIDescriptor::isDerivedType() const { case dwarf::DW_TAG_restrict_type: case dwarf::DW_TAG_member: case dwarf::DW_TAG_inheritance: + case dwarf::DW_TAG_friend: return true; default: // CompositeTypes are currently modelled as DerivedTypes. @@ -161,7 +188,8 @@ bool DIDescriptor::isSubprogram() const { /// isGlobalVariable - Return true if the specified tag is legal for /// DIGlobalVariable. bool DIDescriptor::isGlobalVariable() const { - return DbgNode && getTag() == dwarf::DW_TAG_variable; + return DbgNode && (getTag() == dwarf::DW_TAG_variable || + getTag() == dwarf::DW_TAG_constant); } /// isGlobal - Return true if the specified tag is legal for DIGlobal. @@ -233,9 +261,8 @@ unsigned DIArray::getNumElements() const { } /// replaceAllUsesWith - Replace all uses of debug info referenced by -/// this descriptor. After this completes, the current debug info value -/// is erased. -void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) { +/// this descriptor. +void DIType::replaceAllUsesWith(DIDescriptor &D) { if (!DbgNode) return; @@ -249,7 +276,7 @@ void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) { const MDNode *DN = D; const Value *V = cast_or_null<Value>(DN); Node->replaceAllUsesWith(const_cast<Value*>(V)); - Node->destroy(); + MDNode::deleteTemporary(Node); } } @@ -277,6 +304,16 @@ bool DIType::Verify() const { return true; } +/// Verify - Verify that a basic type descriptor is well formed. +bool DIBasicType::Verify() const { + return isBasicType(); +} + +/// Verify - Verify that a derived type descriptor is well formed. +bool DIDerivedType::Verify() const { + return isDerivedType(); +} + /// Verify - Verify that a composite type descriptor is well formed. bool DICompositeType::Verify() const { if (!DbgNode) @@ -327,7 +364,7 @@ bool DIGlobalVariable::Verify() const { if (!Ty.Verify()) return false; - if (!getGlobal()) + if (!getGlobal() && !getConstant()) return false; return true; @@ -355,7 +392,7 @@ bool DIVariable::Verify() const { bool DILocation::Verify() const { if (!DbgNode) return false; - + return DbgNode->getNumOperands() == 4; } @@ -378,7 +415,7 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || Tag == dwarf::DW_TAG_restrict_type) { DIType BaseType = getTypeDerivedFrom(); - // If this type is not derived from any type then take conservative + // If this type is not derived from any type then take conservative // approach. if (!BaseType.isValid()) return getSizeInBits(); @@ -387,17 +424,17 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { else return BaseType.getSizeInBits(); } - + return getSizeInBits(); } -/// isInlinedFnArgument - Return trule if this variable provides debugging +/// isInlinedFnArgument - Return true if this variable provides debugging /// information for an inlined function arguments. bool DIVariable::isInlinedFnArgument(const Function *CurFn) { assert(CurFn && "Invalid function"); if (!getContext().isSubprogram()) return false; - // This variable is not inlined function argument if its scope + // This variable is not inlined function argument if its scope // does not describe current function. return !(DISubprogram(getContext()).describes(CurFn)); } @@ -416,7 +453,7 @@ bool DISubprogram::describes(const Function *F) { return false; } -unsigned DISubprogram::isOptimized() const { +unsigned DISubprogram::isOptimized() const { assert (DbgNode && "Invalid subprogram descriptor!"); if (DbgNode->getNumOperands() == 16) return getUnsignedField(15); @@ -426,7 +463,7 @@ unsigned DISubprogram::isOptimized() const { StringRef DIScope::getFilename() const { if (!DbgNode) return StringRef(); - if (isLexicalBlock()) + if (isLexicalBlock()) return DILexicalBlock(DbgNode).getFilename(); if (isSubprogram()) return DISubprogram(DbgNode).getFilename(); @@ -445,7 +482,7 @@ StringRef DIScope::getFilename() const { StringRef DIScope::getDirectory() const { if (!DbgNode) return StringRef(); - if (isLexicalBlock()) + if (isLexicalBlock()) return DILexicalBlock(DbgNode).getDirectory(); if (isSubprogram()) return DISubprogram(DbgNode).getDirectory(); @@ -899,7 +936,26 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag, ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), ContainingType }; - return DICompositeType(MDNode::get(VMContext, &Elts[0], 13)); + + MDNode *Node = MDNode::get(VMContext, &Elts[0], 13); + // Create a named metadata so that we do not lose this enum info. + if (Tag == dwarf::DW_TAG_enumeration_type) { + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); + NMD->addOperand(Node); + } + return DICompositeType(Node); +} + + +/// CreateTemporaryType - Create a temporary forward-declared type. +DIType DIFactory::CreateTemporaryType() { + // Give the temporary MDNode a tag. It doesn't matter what tag we + // use here as long as DIType accepts it. + Value *Elts[] = { + GetTagConstant(DW_TAG_base_type) + }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); + return DIType(Node); } @@ -915,8 +971,8 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag, unsigned Flags, DIType DerivedFrom, DIArray Elements, - unsigned RuntimeLang) { - + unsigned RuntimeLang, + MDNode *ContainingType) { Value *Elts[] = { GetTagConstant(Tag), Context, @@ -929,9 +985,16 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), DerivedFrom, Elements, - ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang) + ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), + ContainingType }; - return DICompositeType(MDNode::get(VMContext, &Elts[0], 12)); + MDNode *Node = MDNode::get(VMContext, &Elts[0], 13); + // Create a named metadata so that we do not lose this enum info. + if (Tag == dwarf::DW_TAG_enumeration_type) { + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); + NMD->addOperand(Node); + } + return DICompositeType(Node); } @@ -980,8 +1043,8 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, } /// CreateSubprogramDefinition - Create new subprogram descriptor for the -/// given declaration. -DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) { +/// given declaration. +DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration){ if (SPDeclaration.isDefinition()) return DISubprogram(SPDeclaration); @@ -1046,6 +1109,38 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, return DIGlobalVariable(Node); } +/// CreateGlobalVariable - Create a new descriptor for the specified constant. +DIGlobalVariable +DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, + StringRef DisplayName, + StringRef LinkageName, + DIFile F, + unsigned LineNo, DIType Ty,bool isLocalToUnit, + bool isDefinition, llvm::Constant *Val) { + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_variable), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context, + MDString::get(VMContext, Name), + MDString::get(VMContext, DisplayName), + MDString::get(VMContext, LinkageName), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + Val + }; + + Value *const *Vs = &Elts[0]; + MDNode *Node = MDNode::get(VMContext,Vs, 12); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addOperand(Node); + + return DIGlobalVariable(Node); +} /// CreateVariable - Create a new descriptor for the specified variable. DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, @@ -1073,10 +1168,10 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, char One = '\1'; if (FName.startswith(StringRef(&One, 1))) FName = FName.substr(1); - NamedMDNode *FnLocals = M.getNamedMetadata(Twine("llvm.dbg.lv.", FName)); - if (!FnLocals) - FnLocals = NamedMDNode::Create(VMContext, Twine("llvm.dbg.lv.", FName), - NULL, 0, &M); + + SmallString<32> Out; + NamedMDNode *FnLocals = + M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FName).toStringRef(Out)); FnLocals->addOperand(Node); } return DIVariable(Node); @@ -1089,7 +1184,7 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context, const std::string &Name, DIFile F, unsigned LineNo, - DIType Ty, + DIType Ty, SmallVector<Value *, 9> &addr) { SmallVector<Value *, 9> Elts; Elts.push_back(GetTagConstant(Tag)); @@ -1107,14 +1202,19 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context, /// CreateBlock - This creates a descriptor for a lexical block with the /// specified parent VMContext. DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context, - unsigned LineNo, unsigned Col) { + DIFile F, unsigned LineNo, + unsigned Col) { + // Defeat MDNode uniqing for lexical blocks. + static unsigned int unique_id = 0; Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_lexical_block), Context, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - ConstantInt::get(Type::getInt32Ty(VMContext), Col) + ConstantInt::get(Type::getInt32Ty(VMContext), Col), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) }; - return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 4)); + return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 6)); } /// CreateNameSpace - This creates new descriptor for a namespace @@ -1174,7 +1274,7 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, // If this block already has a terminator then insert this intrinsic // before the terminator. - if (TerminatorInst *T = InsertAtEnd->getTerminator()) + if (TerminatorInst *T = InsertAtEnd->getTerminator()) return CallInst::Create(DeclareFn, Args, Args+2, "", T); else return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);} @@ -1203,7 +1303,7 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - Value *Args[] = { MDNode::get(V->getContext(), &V, 1), + Value *Args[] = { MDNode::get(V->getContext(), &V, 1), ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), D }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); @@ -1221,21 +1321,21 @@ void DebugInfoFinder::processModule(Module &M) { ++BI) { if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) processDeclare(DDI); - + DebugLoc Loc = BI->getDebugLoc(); if (Loc.isUnknown()) continue; - + LLVMContext &Ctx = BI->getContext(); DIDescriptor Scope(Loc.getScope(Ctx)); - + if (Scope.isCompileUnit()) addCompileUnit(DICompileUnit(Scope)); else if (Scope.isSubprogram()) processSubprogram(DISubprogram(Scope)); else if (Scope.isLexicalBlock()) processLexicalBlock(DILexicalBlock(Scope)); - + if (MDNode *IA = Loc.getInlinedAt(Ctx)) processLocation(DILocation(IA)); } @@ -1380,7 +1480,7 @@ static Value *findDbgGlobalDeclare(GlobalVariable *V) { return 0; for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIDescriptor DIG(cast_or_null<MDNode>(NMD->getOperand(i))); + DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i))); if (!DIG.isGlobalVariable()) continue; if (DIGlobalVariable(DIG).getGlobal() == V) @@ -1393,16 +1493,16 @@ static Value *findDbgGlobalDeclare(GlobalVariable *V) { /// It looks through pointer casts too. static const DbgDeclareInst *findDbgDeclare(const Value *V) { V = V->stripPointerCasts(); - + if (!isa<Instruction>(V) && !isa<Argument>(V)) return 0; - + const Function *F = NULL; if (const Instruction *I = dyn_cast<Instruction>(V)) F = I->getParent()->getParent(); else if (const Argument *A = dyn_cast<Argument>(V)) F = A->getParent(); - + for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; ++BI) @@ -1460,10 +1560,10 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) { DIDescriptor D(Scope); if (D.isSubprogram()) return DISubprogram(Scope); - + if (D.isLexicalBlock()) return getDISubprogram(DILexicalBlock(Scope).getContext()); - + return DISubprogram(); } @@ -1471,9 +1571,9 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) { DICompositeType llvm::getDICompositeType(DIType T) { if (T.isCompositeType()) return DICompositeType(T); - + if (T.isDerivedType()) return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom()); - + return DICompositeType(); } diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp index d95c3761bee62..9f340942f2ccb 100644 --- a/lib/Analysis/DomPrinter.cpp +++ b/lib/Analysis/DomPrinter.cpp @@ -86,99 +86,100 @@ namespace { struct DomViewer : public DOTGraphTraitsViewer<DominatorTree, false> { static char ID; - DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", &ID){} + DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){} }; struct DomOnlyViewer : public DOTGraphTraitsViewer<DominatorTree, true> { static char ID; - DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", &ID){} + DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){} }; struct PostDomViewer : public DOTGraphTraitsViewer<PostDominatorTree, false> { static char ID; PostDomViewer() : - DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", &ID){} + DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){} }; struct PostDomOnlyViewer : public DOTGraphTraitsViewer<PostDominatorTree, true> { static char ID; PostDomOnlyViewer() : - DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", &ID){} + DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){} }; } // end anonymous namespace char DomViewer::ID = 0; -RegisterPass<DomViewer> A("view-dom", - "View dominance tree of function"); +INITIALIZE_PASS(DomViewer, "view-dom", + "View dominance tree of function", false, false); char DomOnlyViewer::ID = 0; -RegisterPass<DomOnlyViewer> B("view-dom-only", - "View dominance tree of function " - "(with no function bodies)"); +INITIALIZE_PASS(DomOnlyViewer, "view-dom-only", + "View dominance tree of function (with no function bodies)", + false, false); char PostDomViewer::ID = 0; -RegisterPass<PostDomViewer> C("view-postdom", - "View postdominance tree of function"); +INITIALIZE_PASS(PostDomViewer, "view-postdom", + "View postdominance tree of function", false, false); char PostDomOnlyViewer::ID = 0; -RegisterPass<PostDomOnlyViewer> D("view-postdom-only", - "View postdominance tree of function " - "(with no function bodies)"); +INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only", + "View postdominance tree of function " + "(with no function bodies)", + false, false); namespace { struct DomPrinter : public DOTGraphTraitsPrinter<DominatorTree, false> { static char ID; - DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", &ID) {} + DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {} }; struct DomOnlyPrinter : public DOTGraphTraitsPrinter<DominatorTree, true> { static char ID; - DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", &ID) {} + DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {} }; struct PostDomPrinter : public DOTGraphTraitsPrinter<PostDominatorTree, false> { static char ID; PostDomPrinter() : - DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", &ID) {} + DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {} }; struct PostDomOnlyPrinter : public DOTGraphTraitsPrinter<PostDominatorTree, true> { static char ID; PostDomOnlyPrinter() : - DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", &ID) {} + DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {} }; } // end anonymous namespace char DomPrinter::ID = 0; -RegisterPass<DomPrinter> E("dot-dom", - "Print dominance tree of function " - "to 'dot' file"); +INITIALIZE_PASS(DomPrinter, "dot-dom", + "Print dominance tree of function to 'dot' file", + false, false); char DomOnlyPrinter::ID = 0; -RegisterPass<DomOnlyPrinter> F("dot-dom-only", - "Print dominance tree of function " - "to 'dot' file " - "(with no function bodies)"); +INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only", + "Print dominance tree of function to 'dot' file " + "(with no function bodies)", + false, false); char PostDomPrinter::ID = 0; -RegisterPass<PostDomPrinter> G("dot-postdom", - "Print postdominance tree of function " - "to 'dot' file"); +INITIALIZE_PASS(PostDomPrinter, "dot-postdom", + "Print postdominance tree of function to 'dot' file", + false, false); char PostDomOnlyPrinter::ID = 0; -RegisterPass<PostDomOnlyPrinter> H("dot-postdom-only", - "Print postdominance tree of function " - "to 'dot' file " - "(with no function bodies)"); +INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only", + "Print postdominance tree of function to 'dot' file " + "(with no function bodies)", + false, false); // Create methods available outside of this file, to use them // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 65c7c6efd8027..b3635283fda58 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -42,7 +42,7 @@ class BasicCallGraph : public ModulePass, public CallGraph { public: static char ID; // Class identification, replacement for typeinfo - BasicCallGraph() : ModulePass(&ID), Root(0), + BasicCallGraph() : ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) {} // runOnModule - Compute the call graph for the specified module. @@ -86,8 +86,8 @@ public: /// an analysis interface through multiple inheritance. If needed, it should /// override this to adjust the this pointer as needed for the specified pass /// info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) { - if (PI->isPassID(&CallGraph::ID)) + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &CallGraph::ID) return (CallGraph*)this; return this; } @@ -145,8 +145,8 @@ private: for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { - CallSite CS = CallSite::get(II); - if (CS.getInstruction() && !isa<DbgInfoIntrinsic>(II)) { + CallSite CS(cast<Value>(II)); + if (CS && !isa<DbgInfoIntrinsic>(II)) { const Function *Callee = CS.getCalledFunction(); if (Callee) Node->addCalledFunction(CS, getOrInsertFunction(Callee)); @@ -172,9 +172,8 @@ private: } //End anonymous namespace static RegisterAnalysisGroup<CallGraph> X("Call Graph"); -static RegisterPass<BasicCallGraph> -Y("basiccg", "Basic CallGraph Construction", false, true); -static RegisterAnalysisGroup<CallGraph, true> Z(Y); +INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg", + "Basic CallGraph Construction", false, true, true); char CallGraph::ID = 0; char BasicCallGraph::ID = 0; diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 0c01ee5b82848..b7a27cb288d92 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -45,7 +45,7 @@ class CGPassManager : public ModulePass, public PMDataManager { public: static char ID; explicit CGPassManager(int Depth) - : ModulePass(&ID), PMDataManager(Depth) { } + : ModulePass(ID), PMDataManager(Depth) { } /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. @@ -209,7 +209,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, // If the call edge is not from a call or invoke, then the function // pass RAUW'd a call with another value. This can happen when // constant folding happens of well known functions etc. - CallSite::get(I->first).getInstruction() == 0) { + !CallSite(I->first)) { assert(!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!"); @@ -245,8 +245,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - CallSite CS = CallSite::get(I); - if (!CS.getInstruction() || isa<DbgInfoIntrinsic>(I)) continue; + CallSite CS(cast<Value>(I)); + if (!CS || isa<DbgInfoIntrinsic>(I)) continue; // If this call site already existed in the callgraph, just verify it // matches up to expectations and remove it from CallSites. @@ -582,9 +582,9 @@ namespace { public: static char ID; - PrintCallGraphPass() : CallGraphSCCPass(&ID), Out(dbgs()) {} + PrintCallGraphPass() : CallGraphSCCPass(ID), Out(dbgs()) {} PrintCallGraphPass(const std::string &B, raw_ostream &o) - : CallGraphSCCPass(&ID), Banner(B), Out(o) {} + : CallGraphSCCPass(ID), Banner(B), Out(o) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp index c4fb0b9a4e3dd..8eed9d6f68bc5 100644 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -23,8 +23,8 @@ using namespace llvm; char FindUsedTypes::ID = 0; -static RegisterPass<FindUsedTypes> -X("print-used-types", "Find Used Types", false, true); +INITIALIZE_PASS(FindUsedTypes, "print-used-types", + "Find Used Types", false, true); // IncorporateType - Incorporate one type and all of its subtypes into the // collection of used types. diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index f13deea41d4ed..6759b0afdce39 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -47,14 +47,15 @@ namespace { /// GlobalInfo - Maintain mod/ref info for all of the globals without /// addresses taken that are read or written (transitively) by this /// function. - std::map<GlobalValue*, unsigned> GlobalInfo; + std::map<const GlobalValue*, unsigned> GlobalInfo; /// MayReadAnyGlobal - May read global variables, but it is not known which. bool MayReadAnyGlobal; - unsigned getInfoForGlobal(GlobalValue *GV) const { + unsigned getInfoForGlobal(const GlobalValue *GV) const { unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; - std::map<GlobalValue*, unsigned>::const_iterator I = GlobalInfo.find(GV); + std::map<const GlobalValue*, unsigned>::const_iterator I = + GlobalInfo.find(GV); if (I != GlobalInfo.end()) Effect |= I->second; return Effect; @@ -71,23 +72,23 @@ namespace { class GlobalsModRef : public ModulePass, public AliasAnalysis { /// NonAddressTakenGlobals - The globals that do not have their addresses /// taken. - std::set<GlobalValue*> NonAddressTakenGlobals; + std::set<const GlobalValue*> NonAddressTakenGlobals; /// IndirectGlobals - The memory pointed to by this global is known to be /// 'owned' by the global. - std::set<GlobalValue*> IndirectGlobals; + std::set<const GlobalValue*> IndirectGlobals; /// AllocsForIndirectGlobals - If an instruction allocates memory for an /// indirect global, this map indicates which one. - std::map<Value*, GlobalValue*> AllocsForIndirectGlobals; + std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals; /// FunctionInfo - For each function, keep track of what globals are /// modified or read. - std::map<Function*, FunctionRecord> FunctionInfo; + std::map<const Function*, FunctionRecord> FunctionInfo; public: static char ID; - GlobalsModRef() : ModulePass(&ID) {} + GlobalsModRef() : ModulePass(ID) {} bool runOnModule(Module &M) { InitializeAliasAnalysis(this); // set up super class @@ -107,39 +108,39 @@ namespace { // AliasResult alias(const Value *V1, unsigned V1Size, const Value *V2, unsigned V2Size); - ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); - ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) { - return AliasAnalysis::getModRefInfo(CS1,CS2); + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size); + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1, CS2); } /// getModRefBehavior - Return the behavior of the specified function if /// called from the specified call site. The call site may be null in which /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(Function *F, - std::vector<PointerAccessInfo> *Info) { + ModRefBehavior getModRefBehavior(const Function *F) { if (FunctionRecord *FR = getFunctionInfo(F)) { if (FR->FunctionEffect == 0) return DoesNotAccessMemory; else if ((FR->FunctionEffect & Mod) == 0) return OnlyReadsMemory; } - return AliasAnalysis::getModRefBehavior(F, Info); + return AliasAnalysis::getModRefBehavior(F); } /// getModRefBehavior - Return the behavior of the specified function if /// called from the specified call site. The call site may be null in which /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(CallSite CS, - std::vector<PointerAccessInfo> *Info) { - Function* F = CS.getCalledFunction(); - if (!F) return AliasAnalysis::getModRefBehavior(CS, Info); + ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + const Function* F = CS.getCalledFunction(); + if (!F) return AliasAnalysis::getModRefBehavior(CS); if (FunctionRecord *FR = getFunctionInfo(F)) { if (FR->FunctionEffect == 0) return DoesNotAccessMemory; else if ((FR->FunctionEffect & Mod) == 0) return OnlyReadsMemory; } - return AliasAnalysis::getModRefBehavior(CS, Info); + return AliasAnalysis::getModRefBehavior(CS); } virtual void deleteValue(Value *V); @@ -149,8 +150,8 @@ namespace { /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) { - if (PI->isPassID(&AliasAnalysis::ID)) + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; } @@ -158,8 +159,9 @@ namespace { private: /// getFunctionInfo - Return the function info for the function, or null if /// we don't have anything useful to say about it. - FunctionRecord *getFunctionInfo(Function *F) { - std::map<Function*, FunctionRecord>::iterator I = FunctionInfo.find(F); + FunctionRecord *getFunctionInfo(const Function *F) { + std::map<const Function*, FunctionRecord>::iterator I = + FunctionInfo.find(F); if (I != FunctionInfo.end()) return &I->second; return 0; @@ -175,9 +177,9 @@ namespace { } char GlobalsModRef::ID = 0; -static RegisterPass<GlobalsModRef> -X("globalsmodref-aa", "Simple mod/ref analysis for globals", false, true); -static RegisterAnalysisGroup<AliasAnalysis> Y(X); +INITIALIZE_AG_PASS(GlobalsModRef, AliasAnalysis, + "globalsmodref-aa", "Simple mod/ref analysis for globals", + false, true, false); Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } @@ -409,7 +411,7 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { FunctionEffect |= CalleeFR->FunctionEffect; // Incorporate callee's effects on globals into our info. - for (std::map<GlobalValue*, unsigned>::iterator GI = + for (std::map<const GlobalValue*, unsigned>::iterator GI = CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end(); GI != E; ++GI) FR.GlobalInfo[GI->first] |= GI->second; @@ -477,13 +479,13 @@ AliasAnalysis::AliasResult GlobalsModRef::alias(const Value *V1, unsigned V1Size, const Value *V2, unsigned V2Size) { // Get the base object these pointers point to. - Value *UV1 = const_cast<Value*>(V1->getUnderlyingObject()); - Value *UV2 = const_cast<Value*>(V2->getUnderlyingObject()); + const Value *UV1 = V1->getUnderlyingObject(); + const Value *UV2 = V2->getUnderlyingObject(); // If either of the underlying values is a global, they may be non-addr-taken // globals, which we can answer queries about. - GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1); - GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2); + const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1); + const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2); if (GV1 || GV2) { // If the global's address is taken, pretend we don't know it's a pointer to // the global. @@ -503,12 +505,12 @@ GlobalsModRef::alias(const Value *V1, unsigned V1Size, // so, we may be able to handle this. First check to see if the base pointer // is a direct load from an indirect global. GV1 = GV2 = 0; - if (LoadInst *LI = dyn_cast<LoadInst>(UV1)) + if (const LoadInst *LI = dyn_cast<LoadInst>(UV1)) if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) if (IndirectGlobals.count(GV)) GV1 = GV; - if (LoadInst *LI = dyn_cast<LoadInst>(UV2)) - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) + if (const LoadInst *LI = dyn_cast<LoadInst>(UV2)) + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) if (IndirectGlobals.count(GV)) GV2 = GV; @@ -530,16 +532,17 @@ GlobalsModRef::alias(const Value *V1, unsigned V1Size, } AliasAnalysis::ModRefResult -GlobalsModRef::getModRefInfo(CallSite CS, Value *P, unsigned Size) { +GlobalsModRef::getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { unsigned Known = ModRef; // If we are asking for mod/ref info of a direct call with a pointer to a // global we are tracking, return information if we have it. - if (GlobalValue *GV = dyn_cast<GlobalValue>(P->getUnderlyingObject())) + if (const GlobalValue *GV = dyn_cast<GlobalValue>(P->getUnderlyingObject())) if (GV->hasLocalLinkage()) - if (Function *F = CS.getCalledFunction()) + if (const Function *F = CS.getCalledFunction()) if (NonAddressTakenGlobals.count(GV)) - if (FunctionRecord *FR = getFunctionInfo(F)) + if (const FunctionRecord *FR = getFunctionInfo(F)) Known = FR->getInfoForGlobal(GV); if (Known == NoModRef) @@ -558,7 +561,7 @@ void GlobalsModRef::deleteValue(Value *V) { // any AllocRelatedValues for it. if (IndirectGlobals.erase(GV)) { // Remove any entries in AllocsForIndirectGlobals for this global. - for (std::map<Value*, GlobalValue*>::iterator + for (std::map<const Value*, const GlobalValue*>::iterator I = AllocsForIndirectGlobals.begin(), E = AllocsForIndirectGlobals.end(); I != E; ) { if (I->second == GV) { diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 2c997dae5859c..cdf667ad6eed9 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -21,7 +21,6 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Assembly/AsmAnnotationWriter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -29,8 +28,7 @@ using namespace llvm; char IVUsers::ID = 0; -static RegisterPass<IVUsers> -X("iv-users", "Induction Variable Users", false, true); +INITIALIZE_PASS(IVUsers, "iv-users", "Induction Variable Users", false, true); Pass *llvm::createIVUsersPass() { return new IVUsers(); @@ -39,27 +37,31 @@ Pass *llvm::createIVUsersPass() { /// isInteresting - Test whether the given expression is "interesting" when /// used by the given expression, within the context of analyzing the /// given loop. -static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) { - // Anything loop-invariant is interesting. - if (!isa<SCEVUnknown>(S) && S->isLoopInvariant(L)) - return true; - +static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, + ScalarEvolution *SE) { // An addrec is interesting if it's affine or if it has an interesting start. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Keep things simple. Don't touch loop-variant strides. if (AR->getLoop() == L) return AR->isAffine() || !L->contains(I); - // Otherwise recurse to see if the start value is interesting. - return isInteresting(AR->getStart(), I, L); + // Otherwise recurse to see if the start value is interesting, and that + // the step value is not interesting, since we don't yet know how to + // do effective SCEV expansions for addrecs with interesting steps. + return isInteresting(AR->getStart(), I, L, SE) && + !isInteresting(AR->getStepRecurrence(*SE), I, L, SE); } - // An add is interesting if any of its operands is. + // An add is interesting if exactly one of its operands is interesting. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + bool AnyInterestingYet = false; for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); OI != OE; ++OI) - if (isInteresting(*OI, I, L)) - return true; - return false; + if (isInteresting(*OI, I, L, SE)) { + if (AnyInterestingYet) + return false; + AnyInterestingYet = true; + } + return AnyInterestingYet; } // Nothing else is interesting here. @@ -85,7 +87,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { // If we've come to an uninteresting expression, stop the traversal and // call this a user. - if (!isInteresting(ISE, I, L)) + if (!isInteresting(ISE, I, L, SE)) return false; SmallPtrSet<Instruction *, 4> UniqueUsers; @@ -141,7 +143,7 @@ IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { } IVUsers::IVUsers() - : LoopPass(&ID) { + : LoopPass(ID) { } void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { @@ -176,9 +178,6 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { } OS << ":\n"; - // Use a default AssemblyAnnotationWriter to suppress the default info - // comments, which aren't relevant here. - AssemblyAnnotationWriter Annotator; for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(), E = IVUses.end(); UI != E; ++UI) { OS << " "; @@ -192,7 +191,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << ")"; } OS << " in "; - UI->getUser()->print(OS, &Annotator); + UI->getUser()->print(OS); OS << '\n'; } } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index b1df517c2a94f..3e550f35c2553 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -152,14 +152,14 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { if (isa<CallInst>(II) || isa<InvokeInst>(II)) { if (isa<DbgInfoIntrinsic>(II)) continue; // Debug intrinsics don't count as size. - - CallSite CS = CallSite::get(const_cast<Instruction*>(&*II)); - + + ImmutableCallSite CS(cast<Instruction>(II)); + // If this function contains a call to setjmp or _setjmp, never inline // it. This is a hack because we depend on the user marking their local // variables as volatile if they are live across a setjmp call, and they // probably won't do this in callers. - if (Function *F = CS.getCalledFunction()) { + if (const Function *F = CS.getCalledFunction()) { if (F->isDeclaration() && (F->getName() == "setjmp" || F->getName() == "_setjmp")) callsSetJmp = true; diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index bb2cf53c85efd..dcbcac005a2fc 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -51,7 +51,7 @@ namespace { } public: static char ID; // Pass identification, replacement for typeid - InstCount() : FunctionPass(&ID) {} + InstCount() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -64,8 +64,8 @@ namespace { } char InstCount::ID = 0; -static RegisterPass<InstCount> -X("instcount", "Counts the various types of Instructions", false, true); +INITIALIZE_PASS(InstCount, "instcount", + "Counts the various types of Instructions", false, true); FunctionPass *llvm::createInstCountPass() { return new InstCount(); } diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp index 1f17b77a5b96f..1c9e14884316b 100644 --- a/lib/Analysis/IntervalPartition.cpp +++ b/lib/Analysis/IntervalPartition.cpp @@ -16,8 +16,8 @@ using namespace llvm; char IntervalPartition::ID = 0; -static RegisterPass<IntervalPartition> -X("intervals", "Interval Partition Construction", true, true); +INITIALIZE_PASS(IntervalPartition, "intervals", + "Interval Partition Construction", true, true); //===----------------------------------------------------------------------===// // IntervalPartition Implementation @@ -91,7 +91,7 @@ bool IntervalPartition::runOnFunction(Function &F) { // distinguish it from a copy constructor. Always pass in false for now. // IntervalPartition::IntervalPartition(IntervalPartition &IP, bool) - : FunctionPass(&ID) { + : FunctionPass(ID) { assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!"); // Pass false to intervals_begin because we take ownership of it's memory diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index ff9026bede97e..e32dbc4447135 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -19,16 +19,18 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; char LazyValueInfo::ID = 0; -static RegisterPass<LazyValueInfo> -X("lazy-value-info", "Lazy Value Information Analysis", false, true); +INITIALIZE_PASS(LazyValueInfo, "lazy-value-info", + "Lazy Value Information Analysis", false, true); namespace llvm { FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); } @@ -50,12 +52,15 @@ class LVILatticeVal { enum LatticeValueTy { /// undefined - This LLVM Value has no known value yet. undefined, + /// constant - This LLVM Value has a specific constant value. constant, - /// notconstant - This LLVM value is known to not have the specified value. notconstant, + /// constantrange + constantrange, + /// overdefined - This instruction is not known to be constant, and we know /// it has a value. overdefined @@ -63,42 +68,62 @@ class LVILatticeVal { /// Val: This stores the current lattice value along with the Constant* for /// the constant if this is a 'constant' or 'notconstant' value. - PointerIntPair<Constant *, 2, LatticeValueTy> Val; + LatticeValueTy Tag; + Constant *Val; + ConstantRange Range; public: - LVILatticeVal() : Val(0, undefined) {} + LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {} static LVILatticeVal get(Constant *C) { LVILatticeVal Res; - Res.markConstant(C); + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) + Res.markConstantRange(ConstantRange(CI->getValue(), CI->getValue()+1)); + else if (!isa<UndefValue>(C)) + Res.markConstant(C); return Res; } static LVILatticeVal getNot(Constant *C) { LVILatticeVal Res; - Res.markNotConstant(C); + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) + Res.markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue())); + else + Res.markNotConstant(C); + return Res; + } + static LVILatticeVal getRange(ConstantRange CR) { + LVILatticeVal Res; + Res.markConstantRange(CR); return Res; } - bool isUndefined() const { return Val.getInt() == undefined; } - bool isConstant() const { return Val.getInt() == constant; } - bool isNotConstant() const { return Val.getInt() == notconstant; } - bool isOverdefined() const { return Val.getInt() == overdefined; } + bool isUndefined() const { return Tag == undefined; } + bool isConstant() const { return Tag == constant; } + bool isNotConstant() const { return Tag == notconstant; } + bool isConstantRange() const { return Tag == constantrange; } + bool isOverdefined() const { return Tag == overdefined; } Constant *getConstant() const { assert(isConstant() && "Cannot get the constant of a non-constant!"); - return Val.getPointer(); + return Val; } Constant *getNotConstant() const { assert(isNotConstant() && "Cannot get the constant of a non-notconstant!"); - return Val.getPointer(); + return Val; + } + + ConstantRange getConstantRange() const { + assert(isConstantRange() && + "Cannot get the constant-range of a non-constant-range!"); + return Range; } /// markOverdefined - Return true if this is a change in status. bool markOverdefined() { if (isOverdefined()) return false; - Val.setInt(overdefined); + Tag = overdefined; return true; } @@ -110,9 +135,9 @@ public: } assert(isUndefined()); - Val.setInt(constant); + Tag = constant; assert(V && "Marking constant with NULL"); - Val.setPointer(V); + Val = V; return true; } @@ -128,9 +153,29 @@ public: else assert(isUndefined()); - Val.setInt(notconstant); + Tag = notconstant; assert(V && "Marking constant with NULL"); - Val.setPointer(V); + Val = V; + return true; + } + + /// markConstantRange - Return true if this is a change in status. + bool markConstantRange(const ConstantRange NewR) { + if (isConstantRange()) { + if (NewR.isEmptySet()) + return markOverdefined(); + + bool changed = Range == NewR; + Range = NewR; + return changed; + } + + assert(isUndefined()); + if (NewR.isEmptySet()) + return markOverdefined(); + + Tag = constantrange; + Range = NewR; return true; } @@ -147,20 +192,39 @@ public: isa<ConstantExpr>(RHS.getNotConstant())) return markOverdefined(); return false; - } - if (isConstant()) { + } else if (isConstant()) { if (getConstant() == RHS.getNotConstant() || isa<ConstantExpr>(RHS.getNotConstant()) || isa<ConstantExpr>(getConstant())) return markOverdefined(); return markNotConstant(RHS.getNotConstant()); + } else if (isConstantRange()) { + return markOverdefined(); } assert(isUndefined() && "Unexpected lattice"); return markNotConstant(RHS.getNotConstant()); } + if (RHS.isConstantRange()) { + if (isConstantRange()) { + ConstantRange NewR = Range.unionWith(RHS.getConstantRange()); + if (NewR.isFullSet()) + return markOverdefined(); + else + return markConstantRange(NewR); + } else if (!isUndefined()) { + return markOverdefined(); + } + + assert(isUndefined() && "Unexpected lattice"); + return markConstantRange(RHS.getConstantRange()); + } + // RHS must be a constant, we must be undef, constant, or notconstant. + assert(!isConstantRange() && + "Constant and ConstantRange cannot be merged."); + if (isUndefined()) return markConstant(RHS.getConstant()); @@ -191,6 +255,9 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { if (Val.isNotConstant()) return OS << "notconstant<" << *Val.getNotConstant() << '>'; + else if (Val.isConstantRange()) + return OS << "constantrange<" << Val.getConstantRange().getLower() << ", " + << Val.getConstantRange().getUpper() << '>'; return OS << "constant<" << *Val.getConstant() << '>'; } } @@ -206,17 +273,41 @@ namespace { public: /// BlockCacheEntryTy - This is a computed lattice value at the end of the /// specified basic block for a Value* that depends on context. - typedef std::pair<BasicBlock*, LVILatticeVal> BlockCacheEntryTy; + typedef std::pair<AssertingVH<BasicBlock>, LVILatticeVal> BlockCacheEntryTy; /// ValueCacheEntryTy - This is all of the cached block information for /// exactly one Value*. The entries are sorted by the BasicBlock* of the /// entries, allowing us to do a lookup with a binary search. - typedef std::vector<BlockCacheEntryTy> ValueCacheEntryTy; + typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy; private: + /// LVIValueHandle - A callback value handle update the cache when + /// values are erased. + struct LVIValueHandle : public CallbackVH { + LazyValueInfoCache *Parent; + + LVIValueHandle(Value *V, LazyValueInfoCache *P) + : CallbackVH(V), Parent(P) { } + + void deleted(); + void allUsesReplacedWith(Value* V) { + deleted(); + } + + LVIValueHandle &operator=(Value *V) { + return *this = LVIValueHandle(V, Parent); + } + }; + /// ValueCache - This is all of the cached information for all values, /// mapped from Value* to key information. - DenseMap<Value*, ValueCacheEntryTy> ValueCache; + std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache; + + /// OverDefinedCache - This tracks, on a per-block basis, the set of + /// values that are over-defined at the end of that block. This is required + /// for cache updating. + std::set<std::pair<AssertingVH<BasicBlock>, Value*> > OverDefinedCache; + public: /// getValueInBlock - This is the query interface to determine the lattice @@ -226,29 +317,23 @@ namespace { /// getValueOnEdge - This is the query interface to determine the lattice /// value for the specified Value* that is true on the specified edge. LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB); - }; -} // end anonymous namespace - -namespace { - struct BlockCacheEntryComparator { - static int Compare(const void *LHSv, const void *RHSv) { - const LazyValueInfoCache::BlockCacheEntryTy *LHS = - static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(LHSv); - const LazyValueInfoCache::BlockCacheEntryTy *RHS = - static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(RHSv); - if (LHS->first < RHS->first) - return -1; - if (LHS->first > RHS->first) - return 1; - return 0; - } - bool operator()(const LazyValueInfoCache::BlockCacheEntryTy &LHS, - const LazyValueInfoCache::BlockCacheEntryTy &RHS) const { - return LHS.first < RHS.first; + /// threadEdge - This is the update interface to inform the cache that an + /// edge from PredBB to OldSucc has been threaded to be from PredBB to + /// NewSucc. + void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); + + /// eraseBlock - This is part of the update interface to inform the cache + /// that a block has been deleted. + void eraseBlock(BasicBlock *BB); + + /// clear - Empty the cache. + void clear() { + ValueCache.clear(); + OverDefinedCache.clear(); } }; -} +} // end anonymous namespace //===----------------------------------------------------------------------===// // LVIQuery Impl @@ -267,78 +352,87 @@ namespace { /// This is the current value being queried for. Value *Val; + /// This is a pointer to the owning cache, for recursive queries. + LazyValueInfoCache &Parent; + /// This is all of the cached information about this value. ValueCacheEntryTy &Cache; + /// This tracks, for each block, what values are overdefined. + std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &OverDefinedCache; + /// NewBlocks - This is a mapping of the new BasicBlocks which have been /// added to cache but that are not in sorted order. - DenseMap<BasicBlock*, LVILatticeVal> NewBlockInfo; + DenseSet<BasicBlock*> NewBlockInfo; + public: - LVIQuery(Value *V, ValueCacheEntryTy &VC) : Val(V), Cache(VC) { + LVIQuery(Value *V, LazyValueInfoCache &P, + ValueCacheEntryTy &VC, + std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &ODC) + : Val(V), Parent(P), Cache(VC), OverDefinedCache(ODC) { } ~LVIQuery() { // When the query is done, insert the newly discovered facts into the // cache in sorted order. if (NewBlockInfo.empty()) return; - - // Grow the cache to exactly fit the new data. - Cache.reserve(Cache.size() + NewBlockInfo.size()); - // If we only have one new entry, insert it instead of doing a full-on - // sort. - if (NewBlockInfo.size() == 1) { - BlockCacheEntryTy Entry = *NewBlockInfo.begin(); - ValueCacheEntryTy::iterator I = - std::lower_bound(Cache.begin(), Cache.end(), Entry, - BlockCacheEntryComparator()); - assert((I == Cache.end() || I->first != Entry.first) && - "Entry already in map!"); - - Cache.insert(I, Entry); - return; + for (DenseSet<BasicBlock*>::iterator I = NewBlockInfo.begin(), + E = NewBlockInfo.end(); I != E; ++I) { + if (Cache[*I].isOverdefined()) + OverDefinedCache.insert(std::make_pair(*I, Val)); } - - // TODO: If we only have two new elements, INSERT them both. - - Cache.insert(Cache.end(), NewBlockInfo.begin(), NewBlockInfo.end()); - array_pod_sort(Cache.begin(), Cache.end(), - BlockCacheEntryComparator::Compare); - } LVILatticeVal getBlockValue(BasicBlock *BB); LVILatticeVal getEdgeValue(BasicBlock *FromBB, BasicBlock *ToBB); private: - LVILatticeVal &getCachedEntryForBlock(BasicBlock *BB); + LVILatticeVal getCachedEntryForBlock(BasicBlock *BB); }; } // end anonymous namespace -/// getCachedEntryForBlock - See if we already have a value for this block. If -/// so, return it, otherwise create a new entry in the NewBlockInfo map to use. -LVILatticeVal &LVIQuery::getCachedEntryForBlock(BasicBlock *BB) { - - // Do a binary search to see if we already have an entry for this block in - // the cache set. If so, find it. - if (!Cache.empty()) { - ValueCacheEntryTy::iterator Entry = - std::lower_bound(Cache.begin(), Cache.end(), - BlockCacheEntryTy(BB, LVILatticeVal()), - BlockCacheEntryComparator()); - if (Entry != Cache.end() && Entry->first == BB) - return Entry->second; +void LazyValueInfoCache::LVIValueHandle::deleted() { + for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator + I = Parent->OverDefinedCache.begin(), + E = Parent->OverDefinedCache.end(); + I != E; ) { + std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I; + ++I; + if (tmp->second == getValPtr()) + Parent->OverDefinedCache.erase(tmp); } - // Otherwise, check to see if it's in NewBlockInfo or create a new entry if - // not. - return NewBlockInfo[BB]; + // This erasure deallocates *this, so it MUST happen after we're done + // using any and all members of *this. + Parent->ValueCache.erase(*this); +} + +void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { + for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator + I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ) { + std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I; + ++I; + if (tmp->first == BB) + OverDefinedCache.erase(tmp); + } + + for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator + I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) + I->second.erase(BB); +} + +/// getCachedEntryForBlock - See if we already have a value for this block. If +/// so, return it, otherwise create a new entry in the Cache map to use. +LVILatticeVal LVIQuery::getCachedEntryForBlock(BasicBlock *BB) { + NewBlockInfo.insert(BB); + return Cache[BB]; } LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) { // See if we already have a value for this block. - LVILatticeVal &BBLV = getCachedEntryForBlock(BB); + LVILatticeVal BBLV = getCachedEntryForBlock(BB); // If we've already computed this block's value, return it. if (!BBLV.isUndefined()) { @@ -350,13 +444,28 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) { // lattice value to overdefined, so that cycles will terminate and be // conservatively correct. BBLV.markOverdefined(); + Cache[BB] = BBLV; - // If V is live into BB, see if our predecessors know anything about it. Instruction *BBI = dyn_cast<Instruction>(Val); if (BBI == 0 || BBI->getParent() != BB) { LVILatticeVal Result; // Start Undefined. - unsigned NumPreds = 0; + // If this is a pointer, and there's a load from that pointer in this BB, + // then we know that the pointer can't be NULL. + bool NotNull = false; + if (Val->getType()->isPointerTy()) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){ + LoadInst *L = dyn_cast<LoadInst>(BI); + if (L && L->getPointerAddressSpace() == 0 && + L->getPointerOperand()->getUnderlyingObject() == + Val->getUnderlyingObject()) { + NotNull = true; + break; + } + } + } + + unsigned NumPreds = 0; // Loop over all of our predecessors, merging what we know from them into // result. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { @@ -367,11 +476,19 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) { if (Result.isOverdefined()) { DEBUG(dbgs() << " compute BB '" << BB->getName() << "' - overdefined because of pred.\n"); + // If we previously determined that this is a pointer that can't be null + // then return that rather than giving up entirely. + if (NotNull) { + const PointerType *PTy = cast<PointerType>(Val->getType()); + Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); + } + return Result; } ++NumPreds; } + // If this is the entry block, we must be asking about an argument. The // value is overdefined. if (NumPreds == 0 && BB == &BB->getParent()->front()) { @@ -382,24 +499,123 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) { // Return the merged value, which is more precise than 'overdefined'. assert(!Result.isOverdefined()); - return getCachedEntryForBlock(BB) = Result; + return Cache[BB] = Result; } // If this value is defined by an instruction in this block, we have to // process it here somehow or return overdefined. if (PHINode *PN = dyn_cast<PHINode>(BBI)) { - (void)PN; - // TODO: PHI Translation in preds. - } else { + LVILatticeVal Result; // Start Undefined. + // Loop over all of our predecessors, merging what we know from them into + // result. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + Value* PhiVal = PN->getIncomingValueForBlock(*PI); + Result.mergeIn(Parent.getValueOnEdge(PhiVal, *PI, BB)); + + // If we hit overdefined, exit early. The BlockVals entry is already set + // to overdefined. + if (Result.isOverdefined()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred.\n"); + return Result; + } + } + + // Return the merged value, which is more precise than 'overdefined'. + assert(!Result.isOverdefined()); + return Cache[BB] = Result; } - - DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined because inst def found.\n"); + assert(Cache[BB].isOverdefined() && "Recursive query changed our cache?"); + + // We can only analyze the definitions of certain classes of instructions + // (integral binops and casts at the moment), so bail if this isn't one. LVILatticeVal Result; - Result.markOverdefined(); - return getCachedEntryForBlock(BB) = Result; + if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) || + !BBI->getType()->isIntegerTy()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + Result.markOverdefined(); + return Result; + } + + // FIXME: We're currently limited to binops with a constant RHS. This should + // be improved. + BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI); + if (BO && !isa<ConstantInt>(BO->getOperand(1))) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + + Result.markOverdefined(); + return Result; + } + + // Figure out the range of the LHS. If that fails, bail. + LVILatticeVal LHSVal = Parent.getValueInBlock(BBI->getOperand(0), BB); + if (!LHSVal.isConstantRange()) { + Result.markOverdefined(); + return Result; + } + + ConstantInt *RHS = 0; + ConstantRange LHSRange = LHSVal.getConstantRange(); + ConstantRange RHSRange(1); + const IntegerType *ResultTy = cast<IntegerType>(BBI->getType()); + if (isa<BinaryOperator>(BBI)) { + RHS = dyn_cast<ConstantInt>(BBI->getOperand(1)); + if (!RHS) { + Result.markOverdefined(); + return Result; + } + + RHSRange = ConstantRange(RHS->getValue(), RHS->getValue()+1); + } + + // NOTE: We're currently limited by the set of operations that ConstantRange + // can evaluate symbolically. Enhancing that set will allows us to analyze + // more definitions. + switch (BBI->getOpcode()) { + case Instruction::Add: + Result.markConstantRange(LHSRange.add(RHSRange)); + break; + case Instruction::Sub: + Result.markConstantRange(LHSRange.sub(RHSRange)); + break; + case Instruction::Mul: + Result.markConstantRange(LHSRange.multiply(RHSRange)); + break; + case Instruction::UDiv: + Result.markConstantRange(LHSRange.udiv(RHSRange)); + break; + case Instruction::Shl: + Result.markConstantRange(LHSRange.shl(RHSRange)); + break; + case Instruction::LShr: + Result.markConstantRange(LHSRange.lshr(RHSRange)); + break; + case Instruction::Trunc: + Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth())); + break; + case Instruction::SExt: + Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth())); + break; + case Instruction::ZExt: + Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth())); + break; + case Instruction::BitCast: + Result.markConstantRange(LHSRange); + break; + + // Unhandled instructions are overdefined. + default: + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + Result.markOverdefined(); + break; + } + + return Cache[BB] = Result; } @@ -420,28 +636,57 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) { // it is. if (BI->getCondition() == Val) return LVILatticeVal::get(ConstantInt::get( - Type::getInt1Ty(Val->getContext()), isTrueDest)); + Type::getInt1Ty(Val->getContext()), isTrueDest)); // If the condition of the branch is an equality comparison, we may be // able to infer the value. - if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) - if (ICI->isEquality() && ICI->getOperand(0) == Val && - isa<Constant>(ICI->getOperand(1))) { + ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()); + if (ICI && ICI->getOperand(0) == Val && + isa<Constant>(ICI->getOperand(1))) { + if (ICI->isEquality()) { // We know that V has the RHS constant if this is a true SETEQ or // false SETNE. if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ)) return LVILatticeVal::get(cast<Constant>(ICI->getOperand(1))); return LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1))); } + + if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) { + // Calculate the range of values that would satisfy the comparison. + ConstantRange CmpRange(CI->getValue(), CI->getValue()+1); + ConstantRange TrueValues = + ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); + + // If we're interested in the false dest, invert the condition. + if (!isTrueDest) TrueValues = TrueValues.inverse(); + + // Figure out the possible values of the query BEFORE this branch. + LVILatticeVal InBlock = getBlockValue(BBFrom); + if (!InBlock.isConstantRange()) + return LVILatticeVal::getRange(TrueValues); + + // Find all potential values that satisfy both the input and output + // conditions. + ConstantRange PossibleValues = + TrueValues.intersectWith(InBlock.getConstantRange()); + + return LVILatticeVal::getRange(PossibleValues); + } + } } } // If the edge was formed by a switch on the value, then we may know exactly // what it is. if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) { - // If BBTo is the default destination of the switch, we don't know anything. - // Given a more powerful range analysis we could know stuff. - if (SI->getCondition() == Val && SI->getDefaultDest() != BBTo) { + if (SI->getCondition() == Val) { + // We don't know anything in the default case. + if (SI->getDefaultDest() == BBTo) { + LVILatticeVal Result; + Result.markOverdefined(); + return Result; + } + // We only know something if there is exactly one value that goes from // BBFrom to BBTo. unsigned NumEdges = 0; @@ -474,7 +719,9 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) { DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); - LVILatticeVal Result = LVIQuery(V, ValueCache[V]).getBlockValue(BB); + LVILatticeVal Result = LVIQuery(V, *this, + ValueCache[LVIValueHandle(V, this)], + OverDefinedCache).getBlockValue(BB); DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; @@ -488,24 +735,80 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) { DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); + LVILatticeVal Result = - LVIQuery(V, ValueCache[V]).getEdgeValue(FromBB, ToBB); + LVIQuery(V, *this, ValueCache[LVIValueHandle(V, this)], + OverDefinedCache).getEdgeValue(FromBB, ToBB); DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; } +void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, + BasicBlock *NewSucc) { + // When an edge in the graph has been threaded, values that we could not + // determine a value for before (i.e. were marked overdefined) may be possible + // to solve now. We do NOT try to proactively update these values. Instead, + // we clear their entries from the cache, and allow lazy updating to recompute + // them when needed. + + // The updating process is fairly simple: we need to dropped cached info + // for all values that were marked overdefined in OldSucc, and for those same + // values in any successor of OldSucc (except NewSucc) in which they were + // also marked overdefined. + std::vector<BasicBlock*> worklist; + worklist.push_back(OldSucc); + + DenseSet<Value*> ClearSet; + for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator + I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) { + if (I->first == OldSucc) + ClearSet.insert(I->second); + } + + // Use a worklist to perform a depth-first search of OldSucc's successors. + // NOTE: We do not need a visited list since any blocks we have already + // visited will have had their overdefined markers cleared already, and we + // thus won't loop to their successors. + while (!worklist.empty()) { + BasicBlock *ToUpdate = worklist.back(); + worklist.pop_back(); + + // Skip blocks only accessible through NewSucc. + if (ToUpdate == NewSucc) continue; + + bool changed = false; + for (DenseSet<Value*>::iterator I = ClearSet.begin(),E = ClearSet.end(); + I != E; ++I) { + // If a value was marked overdefined in OldSucc, and is here too... + std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator OI = + OverDefinedCache.find(std::make_pair(ToUpdate, *I)); + if (OI == OverDefinedCache.end()) continue; + + // Remove it from the caches. + ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)]; + ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); + + assert(CI != Entry.end() && "Couldn't find entry to update?"); + Entry.erase(CI); + OverDefinedCache.erase(OI); + + // If we removed anything, then we potentially need to update + // blocks successors too. + changed = true; + } + + if (!changed) continue; + + worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate)); + } +} + //===----------------------------------------------------------------------===// // LazyValueInfo Impl //===----------------------------------------------------------------------===// -bool LazyValueInfo::runOnFunction(Function &F) { - TD = getAnalysisIfAvailable<TargetData>(); - // Fully lazy. - return false; -} - /// getCache - This lazily constructs the LazyValueInfoCache. static LazyValueInfoCache &getCache(void *&PImpl) { if (!PImpl) @@ -513,6 +816,15 @@ static LazyValueInfoCache &getCache(void *&PImpl) { return *static_cast<LazyValueInfoCache*>(PImpl); } +bool LazyValueInfo::runOnFunction(Function &F) { + if (PImpl) + getCache(PImpl).clear(); + + TD = getAnalysisIfAvailable<TargetData>(); + // Fully lazy. + return false; +} + void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { @@ -526,6 +838,11 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) { if (Result.isConstant()) return Result.getConstant(); + else if (Result.isConstantRange()) { + ConstantRange CR = Result.getConstantRange(); + if (const APInt *SingleVal = CR.getSingleElement()) + return ConstantInt::get(V->getContext(), *SingleVal); + } return 0; } @@ -537,6 +854,11 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, if (Result.isConstant()) return Result.getConstant(); + else if (Result.isConstantRange()) { + ConstantRange CR = Result.getConstantRange(); + if (const APInt *SingleVal = CR.getSingleElement()) + return ConstantInt::get(V->getContext(), *SingleVal); + } return 0; } @@ -557,6 +879,36 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, return Unknown; } + if (Result.isConstantRange()) { + ConstantInt *CI = dyn_cast<ConstantInt>(C); + if (!CI) return Unknown; + + ConstantRange CR = Result.getConstantRange(); + if (Pred == ICmpInst::ICMP_EQ) { + if (!CR.contains(CI->getValue())) + return False; + + if (CR.isSingleElement() && CR.contains(CI->getValue())) + return True; + } else if (Pred == ICmpInst::ICMP_NE) { + if (!CR.contains(CI->getValue())) + return True; + + if (CR.isSingleElement() && CR.contains(CI->getValue())) + return False; + } + + // Handle more complex predicates. + ConstantRange RHS(CI->getValue(), CI->getValue()+1); + ConstantRange TrueValues = ConstantRange::makeICmpRegion(Pred, RHS); + if (CR.intersectWith(TrueValues).isEmptySet()) + return False; + else if (TrueValues.contains(CR)) + return True; + + return Unknown; + } + if (Result.isNotConstant()) { // If this is an equality comparison, we can try to fold it knowing that // "V != C1". @@ -579,4 +931,11 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, return Unknown; } +void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, + BasicBlock* NewSucc) { + if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc); +} +void LazyValueInfo::eraseBlock(BasicBlock *BB) { + if (PImpl) getCache(PImpl).eraseBlock(BB); +} diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp index 7419659298902..7f51202ecb558 100644 --- a/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/lib/Analysis/LibCallAliasAnalysis.cpp @@ -20,11 +20,8 @@ using namespace llvm; // Register this pass... char LibCallAliasAnalysis::ID = 0; -static RegisterPass<LibCallAliasAnalysis> -X("libcall-aa", "LibCall Alias Analysis", false, true); - -// Declare that we implement the AliasAnalysis interface -static RegisterAnalysisGroup<AliasAnalysis> Y(X); +INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa", + "LibCall Alias Analysis", false, true, false); FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) { return new LibCallAliasAnalysis(LCI); @@ -46,7 +43,7 @@ void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { /// vs the specified pointer/size. AliasAnalysis::ModRefResult LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, - CallSite CS, Value *P, + ImmutableCallSite CS, const Value *P, unsigned Size) { // If we have a function, check to see what kind of mod/ref effects it // has. Start by including any info globally known about the function. @@ -120,13 +117,14 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, // specified memory object. // AliasAnalysis::ModRefResult -LibCallAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { +LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { ModRefResult MRInfo = ModRef; // If this is a direct call to a function that LCI knows about, get the // information about the runtime function. if (LCI) { - if (Function *F = CS.getCalledFunction()) { + if (const Function *F = CS.getCalledFunction()) { if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) { MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, P, Size)); if (MRInfo == NoModRef) return NoModRef; diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp index e0060c3e89b1a..81b0f46f3740e 100644 --- a/lib/Analysis/LibCallSemantics.cpp +++ b/lib/Analysis/LibCallSemantics.cpp @@ -40,7 +40,8 @@ const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const { /// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to /// the specified function if we have it. If not, return null. -const LibCallFunctionInfo *LibCallInfo::getFunctionInfo(Function *F) const { +const LibCallFunctionInfo * +LibCallInfo::getFunctionInfo(const Function *F) const { StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl); /// If this is the first time we are querying for this info, lazily construct diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 9f1b30d2cf45c..a9d972435f5fb 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -108,7 +108,7 @@ namespace { raw_string_ostream MessagesStr; static char ID; // Pass identification, replacement for typeid - Lint() : FunctionPass(&ID), MessagesStr(Messages) {} + Lint() : FunctionPass(ID), MessagesStr(Messages) {} virtual bool runOnFunction(Function &F); @@ -167,8 +167,7 @@ namespace { } char Lint::ID = 0; -static RegisterPass<Lint> -X("lint", "Statically lint-checks LLVM IR", false, true); +INITIALIZE_PASS(Lint, "lint", "Statically lint-checks LLVM IR", false, true); // Assert - We know that cond should be true, if not print an error message. #define Assert(C, M) \ @@ -247,8 +246,7 @@ void Lint::visitCallSite(CallSite CS) { // where nothing is known. if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) { - Assert1(AI == BI || - AA->alias(*AI, ~0u, *BI, ~0u) != AliasAnalysis::MustAlias, + Assert1(AI == BI || AA->alias(*AI, *BI) != AliasAnalysis::MustAlias, "Unusual: noalias argument aliases another argument", &I); } @@ -520,6 +518,9 @@ void Lint::visitVAArgInst(VAArgInst &I) { void Lint::visitIndirectBrInst(IndirectBrInst &I) { visitMemoryReference(I, I.getAddress(), ~0u, 0, 0, MemRef::Branchee); + + Assert1(I.getNumDestinations() != 0, + "Undefined behavior: indirectbr with no destinations", &I); } void Lint::visitExtractElementInst(ExtractElementInst &I) { diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp index 23964ffc457ed..0225f4fa25486 100644 --- a/lib/Analysis/LiveValues.cpp +++ b/lib/Analysis/LiveValues.cpp @@ -22,10 +22,10 @@ namespace llvm { } char LiveValues::ID = 0; -static RegisterPass<LiveValues> -X("live-values", "Value Liveness Analysis", false, true); +INITIALIZE_PASS(LiveValues, "live-values", + "Value Liveness Analysis", false, true); -LiveValues::LiveValues() : FunctionPass(&ID) {} +LiveValues::LiveValues() : FunctionPass(ID) {} void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp index e1019474cf431..82c02dcd13425 100644 --- a/lib/Analysis/LoopDependenceAnalysis.cpp +++ b/lib/Analysis/LoopDependenceAnalysis.cpp @@ -46,8 +46,8 @@ LoopPass *llvm::createLoopDependenceAnalysisPass() { return new LoopDependenceAnalysis(); } -static RegisterPass<LoopDependenceAnalysis> -R("lda", "Loop Dependence Analysis", false, true); +INITIALIZE_PASS(LoopDependenceAnalysis, "lda", + "Loop Dependence Analysis", false, true); char LoopDependenceAnalysis::ID = 0; //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 818d0a9dd1146..46219d1b6f55c 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -38,8 +38,7 @@ VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), cl::desc("Verify loop info (time consuming)")); char LoopInfo::ID = 0; -static RegisterPass<LoopInfo> -X("loops", "Natural Loop Information", true, true); +INITIALIZE_PASS(LoopInfo, "loops", "Natural Loop Information", true, true); //===----------------------------------------------------------------------===// // Loop implementation @@ -124,14 +123,13 @@ PHINode *Loop::getCanonicalInductionVariable() const { BasicBlock *H = getHeader(); BasicBlock *Incoming = 0, *Backedge = 0; - typedef GraphTraits<Inverse<BasicBlock*> > InvBlockTraits; - InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(H); - assert(PI != InvBlockTraits::child_end(H) && + pred_iterator PI = pred_begin(H); + assert(PI != pred_end(H) && "Loop must have at least one backedge!"); Backedge = *PI++; - if (PI == InvBlockTraits::child_end(H)) return 0; // dead loop + if (PI == pred_end(H)) return 0; // dead loop Incoming = *PI++; - if (PI != InvBlockTraits::child_end(H)) return 0; // multiple backedges? + if (PI != pred_end(H)) return 0; // multiple backedges? if (contains(Incoming)) { if (contains(Backedge)) @@ -157,18 +155,6 @@ PHINode *Loop::getCanonicalInductionVariable() const { return 0; } -/// getCanonicalInductionVariableIncrement - Return the LLVM value that holds -/// the canonical induction variable value for the "next" iteration of the -/// loop. This always succeeds if getCanonicalInductionVariable succeeds. -/// -Instruction *Loop::getCanonicalInductionVariableIncrement() const { - if (PHINode *PN = getCanonicalInductionVariable()) { - bool P1InLoop = contains(PN->getIncomingBlock(1)); - return cast<Instruction>(PN->getIncomingValue(P1InLoop)); - } - return 0; -} - /// getTripCount - Return a loop-invariant LLVM value indicating the number of /// times the loop will be executed. Note that this means that the backedge /// of the loop executes N-1 times. If the trip-count cannot be determined, @@ -180,12 +166,12 @@ Instruction *Loop::getCanonicalInductionVariableIncrement() const { Value *Loop::getTripCount() const { // Canonical loops will end with a 'cmp ne I, V', where I is the incremented // canonical induction variable and V is the trip count of the loop. - Instruction *Inc = getCanonicalInductionVariableIncrement(); - if (Inc == 0) return 0; - PHINode *IV = cast<PHINode>(Inc->getOperand(0)); + PHINode *IV = getCanonicalInductionVariable(); + if (IV == 0 || IV->getNumIncomingValues() != 2) return 0; - BasicBlock *BackedgeBlock = - IV->getIncomingBlock(contains(IV->getIncomingBlock(1))); + bool P0InLoop = contains(IV->getIncomingBlock(0)); + Value *Inc = IV->getIncomingValue(!P0InLoop); + BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop); if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator())) if (BI->isConditional()) { @@ -341,16 +327,12 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { BasicBlock *current = *BI; switchExitBlocks.clear(); - typedef GraphTraits<BasicBlock *> BlockTraits; - typedef GraphTraits<Inverse<BasicBlock *> > InvBlockTraits; - for (BlockTraits::ChildIteratorType I = - BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); - I != E; ++I) { + for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) { // If block is inside the loop then it is not a exit block. if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) continue; - InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(*I); + pred_iterator PI = pred_begin(*I); BasicBlock *firstPred = *PI; // If current basic block is this exit block's first predecessor @@ -363,8 +345,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { // If a terminator has more then two successors, for example SwitchInst, // then it is possible that there are multiple edges from current block // to one exit block. - if (std::distance(BlockTraits::child_begin(current), - BlockTraits::child_end(current)) <= 2) { + if (std::distance(succ_begin(current), succ_end(current)) <= 2) { ExitBlocks.push_back(*I); continue; } diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 2727d2f9465c1..15d4db8f5f98e 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -30,9 +30,9 @@ private: public: static char ID; - PrintLoopPass() : LoopPass(&ID), Out(dbgs()) {} + PrintLoopPass() : LoopPass(ID), Out(dbgs()) {} PrintLoopPass(const std::string &B, raw_ostream &o) - : LoopPass(&ID), Banner(B), Out(o) {} + : LoopPass(ID), Banner(B), Out(o) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -59,7 +59,7 @@ char PrintLoopPass::ID = 0; char LPPassManager::ID = 0; LPPassManager::LPPassManager(int Depth) - : FunctionPass(&ID), PMDataManager(Depth) { + : FunctionPass(ID), PMDataManager(Depth) { skipThisLoop = false; redoThisLoop = false; LI = NULL; @@ -183,7 +183,7 @@ void LPPassManager::redoLoop(Loop *L) { void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - LoopPass *LP = (LoopPass *)getContainedPass(Index); + LoopPass *LP = getContainedPass(Index); LP->cloneBasicBlockAnalysis(From, To, L); } } @@ -198,7 +198,7 @@ void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) { } } for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - LoopPass *LP = (LoopPass *)getContainedPass(Index); + LoopPass *LP = getContainedPass(Index); LP->deleteAnalysisValue(V, L); } } @@ -240,7 +240,7 @@ bool LPPassManager::runOnFunction(Function &F) { I != E; ++I) { Loop *L = *I; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - LoopPass *P = (LoopPass*)getContainedPass(Index); + LoopPass *P = getContainedPass(Index); Changed |= P->doInitialization(L, *this); } } @@ -254,7 +254,7 @@ bool LPPassManager::runOnFunction(Function &F) { // Run all passes on the current Loop. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - LoopPass *P = (LoopPass*)getContainedPass(Index); + LoopPass *P = getContainedPass(Index); dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, CurrentLoop->getHeader()->getName()); @@ -320,7 +320,7 @@ bool LPPassManager::runOnFunction(Function &F) { // Finalization for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - LoopPass *P = (LoopPass *)getContainedPass(Index); + LoopPass *P = getContainedPass(Index); Changed |= P->doFinalization(); } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 1f54d740db9de..d18d5ce0ea4cb 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -46,11 +46,11 @@ STATISTIC(NumCacheCompleteNonLocalPtr, char MemoryDependenceAnalysis::ID = 0; // Register this pass... -static RegisterPass<MemoryDependenceAnalysis> X("memdep", - "Memory Dependence Analysis", false, true); +INITIALIZE_PASS(MemoryDependenceAnalysis, "memdep", + "Memory Dependence Analysis", false, true); MemoryDependenceAnalysis::MemoryDependenceAnalysis() -: FunctionPass(&ID), PredCache(0) { +: FunctionPass(ID), PredCache(0) { } MemoryDependenceAnalysis::~MemoryDependenceAnalysis() { } @@ -120,33 +120,21 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, Pointer = CI->getArgOperand(0); // calls to free() erase the entire structure PointerSize = ~0ULL; - } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) { + } else if (CallSite InstCS = cast<Value>(Inst)) { // Debug intrinsics don't cause dependences. if (isa<DbgInfoIntrinsic>(Inst)) continue; - CallSite InstCS = CallSite::get(Inst); // If these two calls do not interfere, look past it. switch (AA->getModRefInfo(CS, InstCS)) { case AliasAnalysis::NoModRef: - // If the two calls don't interact (e.g. InstCS is readnone) keep - // scanning. + // If the two calls are the same, return InstCS as a Def, so that + // CS can be found redundant and eliminated. + if (isReadOnlyCall && InstCS.onlyReadsMemory() && + CS.getInstruction()->isIdenticalToWhenDefined(Inst)) + return MemDepResult::getDef(Inst); + + // Otherwise if the two calls don't interact (e.g. InstCS is readnone) + // keep scanning. continue; - case AliasAnalysis::Ref: - // If the two calls read the same memory locations and CS is a readonly - // function, then we have two cases: 1) the calls may not interfere with - // each other at all. 2) the calls may produce the same value. In case - // #1 we want to ignore the values, in case #2, we want to return Inst - // as a Def dependence. This allows us to CSE in cases like: - // X = strlen(P); - // memchr(...); - // Y = strlen(P); // Y = X - if (isReadOnlyCall) { - if (CS.getCalledFunction() != 0 && - CS.getCalledFunction() == InstCS.getCalledFunction()) - return MemDepResult::getDef(Inst); - // Ignore unrelated read/read call dependences. - continue; - } - // FALL THROUGH default: return MemDepResult::getClobber(Inst); } @@ -196,8 +184,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point. - AliasAnalysis::AliasResult R = - AA->alias(II->getArgOperand(2), ~0U, MemPtr, ~0U); + AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(2), MemPtr); if (R == AliasAnalysis::MustAlias) { InvariantTag = II->getArgOperand(0); continue; @@ -209,8 +196,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point. - AliasAnalysis::AliasResult R = - AA->alias(II->getArgOperand(1), ~0U, MemPtr, ~0U); + AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(1), MemPtr); if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(II); } @@ -387,7 +373,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { MemSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); break; default: - CallSite QueryCS = CallSite::get(QueryInst); + CallSite QueryCS(QueryInst); bool isReadOnly = AA->onlyReadsMemory(QueryCS); LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, QueryParent); diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index 556d4c8aab54a..2cc1c2aa005ca 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -30,7 +30,7 @@ namespace { DebugInfoFinder Finder; public: static char ID; // Pass identification, replacement for typeid - ModuleDebugInfoPrinter() : ModulePass(&ID) {} + ModuleDebugInfoPrinter() : ModulePass(ID) {} virtual bool runOnModule(Module &M); @@ -42,9 +42,8 @@ namespace { } char ModuleDebugInfoPrinter::ID = 0; -static RegisterPass<ModuleDebugInfoPrinter> -X("module-debuginfo", - "Decodes module-level debug info", false, true); +INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo", + "Decodes module-level debug info", false, true); ModulePass *llvm::createModuleDebugInfoPrinterPass() { return new ModuleDebugInfoPrinter(); diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp index 14df0b7198791..07f46824700a8 100644 --- a/lib/Analysis/PointerTracking.cpp +++ b/lib/Analysis/PointerTracking.cpp @@ -28,7 +28,7 @@ using namespace llvm; char PointerTracking::ID = 0; -PointerTracking::PointerTracking() : FunctionPass(&ID) {} +PointerTracking::PointerTracking() : FunctionPass(ID) {} bool PointerTracking::runOnFunction(Function &F) { predCache.clear(); @@ -144,6 +144,55 @@ const SCEV *PointerTracking::computeAllocationCount(Value *P, return SE->getCouldNotCompute(); } +Value *PointerTracking::computeAllocationCountValue(Value *P, const Type *&Ty) const +{ + Value *V = P->stripPointerCasts(); + if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + Ty = AI->getAllocatedType(); + // arraySize elements of type Ty. + return AI->getArraySize(); + } + + if (CallInst *CI = extractMallocCall(V)) { + Ty = getMallocAllocatedType(CI); + if (!Ty) + return 0; + Value *arraySize = getMallocArraySize(CI, TD); + if (!arraySize) { + Ty = Type::getInt8Ty(P->getContext()); + return CI->getArgOperand(0); + } + // arraySize elements of type Ty. + return arraySize; + } + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + if (GV->hasDefinitiveInitializer()) { + Constant *C = GV->getInitializer(); + if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) { + Ty = ATy->getElementType(); + return ConstantInt::get(Type::getInt32Ty(P->getContext()), + ATy->getNumElements()); + } + } + Ty = cast<PointerType>(GV->getType())->getElementType(); + return ConstantInt::get(Type::getInt32Ty(P->getContext()), 1); + //TODO: implement more tracking for globals + } + + if (CallInst *CI = dyn_cast<CallInst>(V)) { + CallSite CS(CI); + Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + if (F == reallocFunc) { + Ty = Type::getInt8Ty(P->getContext()); + // realloc allocates arg1 bytes. + return CS.getArgument(1); + } + } + + return 0; +} + // Calculates the number of elements of type Ty allocated for P. const SCEV *PointerTracking::computeAllocationCountForType(Value *P, const Type *Ty) @@ -263,5 +312,5 @@ void PointerTracking::print(raw_ostream &OS, const Module* M) const { } } -static RegisterPass<PointerTracking> X("pointertracking", - "Track pointer bounds", false, true); +INITIALIZE_PASS(PointerTracking, "pointertracking", + "Track pointer bounds", false, true); diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index 7354afa181b2a..cbe8d1867e4f1 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -28,8 +28,8 @@ using namespace llvm; char PostDominatorTree::ID = 0; char PostDominanceFrontier::ID = 0; -static RegisterPass<PostDominatorTree> -F("postdomtree", "Post-Dominator Tree Construction", true, true); +INITIALIZE_PASS(PostDominatorTree, "postdomtree", + "Post-Dominator Tree Construction", true, true); bool PostDominatorTree::runOnFunction(Function &F) { DT->recalculate(F); @@ -53,8 +53,8 @@ FunctionPass* llvm::createPostDomTree() { // PostDominanceFrontier Implementation //===----------------------------------------------------------------------===// -static RegisterPass<PostDominanceFrontier> -H("postdomfrontier", "Post-Dominance Frontier Construction", true, true); +INITIALIZE_PASS(PostDominanceFrontier, "postdomfrontier", + "Post-Dominance Frontier Construction", true, true); const DominanceFrontier::DomSetType & PostDominanceFrontier::calculate(const PostDominatorTree &DT, diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index da4ce47692624..ecc0a1845307a 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -39,7 +39,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo explicit ProfileEstimatorPass(const double execcount = 0) - : FunctionPass(&ID), ExecCount(execcount) { + : FunctionPass(ID), ExecCount(execcount) { if (execcount == 0) ExecCount = LoopWeight; } @@ -59,8 +59,8 @@ namespace { /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) { - if (PI->isPassID(&ProfileInfo::ID)) + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &ProfileInfo::ID) return (ProfileInfo*)this; return this; } @@ -72,13 +72,11 @@ namespace { } // End of anonymous namespace char ProfileEstimatorPass::ID = 0; -static RegisterPass<ProfileEstimatorPass> -X("profile-estimator", "Estimate profiling information", false, true); - -static RegisterAnalysisGroup<ProfileInfo> Y(X); +INITIALIZE_AG_PASS(ProfileEstimatorPass, ProfileInfo, "profile-estimator", + "Estimate profiling information", false, true, false); namespace llvm { - const PassInfo *ProfileEstimatorPassID = &X; + char &ProfileEstimatorPassID = ProfileEstimatorPass::ID; FunctionPass *createProfileEstimatorPass() { return new ProfileEstimatorPass(); diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 8d2712fd6e063..fc7f28662c017 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -1076,14 +1076,14 @@ raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, con namespace { struct NoProfileInfo : public ImmutablePass, public ProfileInfo { static char ID; // Class identification, replacement for typeinfo - NoProfileInfo() : ImmutablePass(&ID) {} + NoProfileInfo() : ImmutablePass(ID) {} /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) { - if (PI->isPassID(&ProfileInfo::ID)) + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &ProfileInfo::ID) return (ProfileInfo*)this; return this; } @@ -1096,10 +1096,7 @@ namespace { char NoProfileInfo::ID = 0; // Register this pass... -static RegisterPass<NoProfileInfo> -X("no-profile", "No Profile Information", false, true); - -// Declare that we implement the ProfileInfo interface -static RegisterAnalysisGroup<ProfileInfo, true> Y(X); +INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile", + "No Profile Information", false, true, true); ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); } diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index 8ea4ecf54f98f..d325b574e8482 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -45,7 +45,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo explicit LoaderPass(const std::string &filename = "") - : ModulePass(&ID), Filename(filename) { + : ModulePass(ID), Filename(filename) { if (filename.empty()) Filename = ProfileInfoFilename; } @@ -67,8 +67,8 @@ namespace { /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) { - if (PI->isPassID(&ProfileInfo::ID)) + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &ProfileInfo::ID) return (ProfileInfo*)this; return this; } @@ -79,12 +79,10 @@ namespace { } // End of anonymous namespace char LoaderPass::ID = 0; -static RegisterPass<LoaderPass> -X("profile-loader", "Load profile information from llvmprof.out", false, true); +INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader", + "Load profile information from llvmprof.out", false, true, false); -static RegisterAnalysisGroup<ProfileInfo> Y(X); - -const PassInfo *llvm::ProfileLoaderPassID = &X; +char &llvm::ProfileLoaderPassID = LoaderPass::ID; ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); } diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp index 5d87e14a97b4f..3f01b2d592bc4 100644 --- a/lib/Analysis/ProfileVerifierPass.cpp +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -59,10 +59,10 @@ namespace llvm { public: static char ID; // Class identification, replacement for typeinfo - explicit ProfileVerifierPassT () : FunctionPass(&ID) { + explicit ProfileVerifierPassT () : FunctionPass(ID) { DisableAssertions = ProfileVerifierDisableAssertions; } - explicit ProfileVerifierPassT (bool da) : FunctionPass(&ID), + explicit ProfileVerifierPassT (bool da) : FunctionPass(ID), DisableAssertions(da) { } @@ -366,8 +366,8 @@ namespace llvm { char ProfileVerifierPassT<FType, BType>::ID = 0; } -static RegisterPass<ProfileVerifierPass> -X("profile-verifier", "Verify profiling information", false, true); +INITIALIZE_PASS(ProfileVerifierPass, "profile-verifier", + "Verify profiling information", false, true); namespace llvm { FunctionPass *createProfileVerifierPass() { diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp new file mode 100644 index 0000000000000..abc057a773a9f --- /dev/null +++ b/lib/Analysis/RegionInfo.cpp @@ -0,0 +1,749 @@ +//===- RegionInfo.cpp - SESE region detection analysis --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Detects single entry single exit regions in the control flow graph. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/LoopInfo.h" + +#define DEBUG_TYPE "region" +#include "llvm/Support/Debug.h" + +#include <set> +#include <algorithm> + +using namespace llvm; + +// Always verify if expensive checking is enabled. +#ifdef XDEBUG +static bool VerifyRegionInfo = true; +#else +static bool VerifyRegionInfo = false; +#endif + +static cl::opt<bool,true> +VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo), + cl::desc("Verify region info (time consuming)")); + +STATISTIC(numRegions, "The # of regions"); +STATISTIC(numSimpleRegions, "The # of simple regions"); + +//===----------------------------------------------------------------------===// +/// PrintStyle - Print region in difference ways. +enum PrintStyle { PrintNone, PrintBB, PrintRN }; + +cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden, + cl::desc("style of printing regions"), + cl::values( + clEnumValN(PrintNone, "none", "print no details"), + clEnumValN(PrintBB, "bb", "print regions in detail with block_iterator"), + clEnumValN(PrintRN, "rn", "print regions in detail with element_iterator"), + clEnumValEnd)); +//===----------------------------------------------------------------------===// +/// Region Implementation +Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo, + DominatorTree *dt, Region *Parent) + : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {} + +Region::~Region() { + // Free the cached nodes. + for (BBNodeMapT::iterator it = BBNodeMap.begin(), + ie = BBNodeMap.end(); it != ie; ++it) + delete it->second; + + // Only clean the cache for this Region. Caches of child Regions will be + // cleaned when the child Regions are deleted. + BBNodeMap.clear(); + + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; +} + +bool Region::contains(const BasicBlock *B) const { + BasicBlock *BB = const_cast<BasicBlock*>(B); + + assert(DT->getNode(BB) && "BB not part of the dominance tree"); + + BasicBlock *entry = getEntry(), *exit = getExit(); + + // Toplevel region. + if (!exit) + return true; + + return (DT->dominates(entry, BB) + && !(DT->dominates(exit, BB) && DT->dominates(entry, exit))); +} + +bool Region::contains(const Loop *L) const { + // BBs that are not part of any loop are element of the Loop + // described by the NULL pointer. This loop is not part of any region, + // except if the region describes the whole function. + if (L == 0) + return getExit() == 0; + + if (!contains(L->getHeader())) + return false; + + SmallVector<BasicBlock *, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(), + BE = ExitingBlocks.end(); BI != BE; ++BI) + if (!contains(*BI)) + return false; + + return true; +} + +Loop *Region::outermostLoopInRegion(Loop *L) const { + if (!contains(L)) + return 0; + + while (L && contains(L->getParentLoop())) { + L = L->getParentLoop(); + } + + return L; +} + +Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const { + assert(LI && BB && "LI and BB cannot be null!"); + Loop *L = LI->getLoopFor(BB); + return outermostLoopInRegion(L); +} + +bool Region::isSimple() const { + bool isSimple = true; + bool found = false; + + BasicBlock *entry = getEntry(), *exit = getExit(); + + // TopLevelRegion + if (!exit) + return false; + + for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE; + ++PI) { + BasicBlock *Pred = *PI; + if (DT->getNode(Pred) && !contains(Pred)) { + if (found) { + isSimple = false; + break; + } + found = true; + } + } + + found = false; + + for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE; + ++PI) + if (contains(*PI)) { + if (found) { + isSimple = false; + break; + } + found = true; + } + + return isSimple; +} + +std::string Region::getNameStr() const { + std::string exitName; + std::string entryName; + + if (getEntry()->getName().empty()) { + raw_string_ostream OS(entryName); + + WriteAsOperand(OS, getEntry(), false); + entryName = OS.str(); + } else + entryName = getEntry()->getNameStr(); + + if (getExit()) { + if (getExit()->getName().empty()) { + raw_string_ostream OS(exitName); + + WriteAsOperand(OS, getExit(), false); + exitName = OS.str(); + } else + exitName = getExit()->getNameStr(); + } else + exitName = "<Function Return>"; + + return entryName + " => " + exitName; +} + +void Region::verifyBBInRegion(BasicBlock *BB) const { + if (!contains(BB)) + llvm_unreachable("Broken region found!"); + + BasicBlock *entry = getEntry(), *exit = getExit(); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (!contains(*SI) && exit != *SI) + llvm_unreachable("Broken region found!"); + + if (entry != BB) + for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI) + if (!contains(*SI)) + llvm_unreachable("Broken region found!"); +} + +void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const { + BasicBlock *exit = getExit(); + + visited->insert(BB); + + verifyBBInRegion(BB); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (*SI != exit && visited->find(*SI) == visited->end()) + verifyWalk(*SI, visited); +} + +void Region::verifyRegion() const { + // Only do verification when user wants to, otherwise this expensive + // check will be invoked by PassManager. + if (!VerifyRegionInfo) return; + + std::set<BasicBlock*> visited; + verifyWalk(getEntry(), &visited); +} + +void Region::verifyRegionNest() const { + for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->verifyRegionNest(); + + verifyRegion(); +} + +Region::block_iterator Region::block_begin() { + return GraphTraits<FlatIt<Region*> >::nodes_begin(this); +} + +Region::block_iterator Region::block_end() { + return GraphTraits<FlatIt<Region*> >::nodes_end(this); +} + +Region::const_block_iterator Region::block_begin() const { + return GraphTraits<FlatIt<const Region*> >::nodes_begin(this); +} + +Region::const_block_iterator Region::block_end() const { + return GraphTraits<FlatIt<const Region*> >::nodes_end(this); +} + +Region::element_iterator Region::element_begin() { + return GraphTraits<Region*>::nodes_begin(this); +} + +Region::element_iterator Region::element_end() { + return GraphTraits<Region*>::nodes_end(this); +} + +Region::const_element_iterator Region::element_begin() const { + return GraphTraits<const Region*>::nodes_begin(this); +} + +Region::const_element_iterator Region::element_end() const { + return GraphTraits<const Region*>::nodes_end(this); +} + +Region* Region::getSubRegionNode(BasicBlock *BB) const { + Region *R = RI->getRegionFor(BB); + + if (!R || R == this) + return 0; + + // If we pass the BB out of this region, that means our code is broken. + assert(contains(R) && "BB not in current region!"); + + while (contains(R->getParent()) && R->getParent() != this) + R = R->getParent(); + + if (R->getEntry() != BB) + return 0; + + return R; +} + +RegionNode* Region::getBBNode(BasicBlock *BB) const { + assert(contains(BB) && "Can get BB node out of this region!"); + + BBNodeMapT::const_iterator at = BBNodeMap.find(BB); + + if (at != BBNodeMap.end()) + return at->second; + + RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB); + BBNodeMap.insert(std::make_pair(BB, NewNode)); + return NewNode; +} + +RegionNode* Region::getNode(BasicBlock *BB) const { + assert(contains(BB) && "Can get BB node out of this region!"); + if (Region* Child = getSubRegionNode(BB)) + return Child->getNode(); + + return getBBNode(BB); +} + +void Region::transferChildrenTo(Region *To) { + for (iterator I = begin(), E = end(); I != E; ++I) { + (*I)->parent = To; + To->children.push_back(*I); + } + children.clear(); +} + +void Region::addSubRegion(Region *SubRegion) { + assert(SubRegion->parent == 0 && "SubRegion already has a parent!"); + SubRegion->parent = this; + // Set up the region node. + assert(std::find(children.begin(), children.end(), SubRegion) == children.end() + && "Node already exist!"); + children.push_back(SubRegion); +} + + +Region *Region::removeSubRegion(Region *Child) { + assert(Child->parent == this && "Child is not a child of this region!"); + Child->parent = 0; + RegionSet::iterator I = std::find(children.begin(), children.end(), Child); + assert(I != children.end() && "Region does not exit. Unable to remove."); + children.erase(children.begin()+(I-begin())); + return Child; +} + +unsigned Region::getDepth() const { + unsigned Depth = 0; + + for (Region *R = parent; R != 0; R = R->parent) + ++Depth; + + return Depth; +} + +void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const { + if (print_tree) + OS.indent(level*2) << "[" << level << "] " << getNameStr(); + else + OS.indent(level*2) << getNameStr(); + + OS << "\n"; + + + if (printStyle != PrintNone) { + OS.indent(level*2) << "{\n"; + OS.indent(level*2 + 2); + + if (printStyle == PrintBB) { + for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I) + OS << **I << ", "; // TODO: remove the last "," + } else if (printStyle == PrintRN) { + for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) + OS << **I << ", "; // TODO: remove the last ", + } + + OS << "\n"; + } + + if (print_tree) + for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->print(OS, print_tree, level+1); + + if (printStyle != PrintNone) + OS.indent(level*2) << "} \n"; +} + +void Region::dump() const { + print(dbgs(), true, getDepth()); +} + +void Region::clearNodeCache() { + BBNodeMap.clear(); + for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->clearNodeCache(); +} + +//===----------------------------------------------------------------------===// +// RegionInfo implementation +// + +bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry, + BasicBlock *exit) const { + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + BasicBlock *P = *PI; + if (DT->dominates(entry, P) && !DT->dominates(exit, P)) + return false; + } + return true; +} + +bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const { + assert(entry && exit && "entry and exit must not be null!"); + typedef DominanceFrontier::DomSetType DST; + + DST *entrySuccs = &DF->find(entry)->second; + + // Exit is the header of a loop that contains the entry. In this case, + // the dominance frontier must only contain the exit. + if (!DT->dominates(entry, exit)) { + for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); + SI != SE; ++SI) + if (*SI != exit && *SI != entry) + return false; + + return true; + } + + DST *exitSuccs = &DF->find(exit)->second; + + // Do not allow edges leaving the region. + for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); + SI != SE; ++SI) { + if (*SI == exit || *SI == entry) + continue; + if (exitSuccs->find(*SI) == exitSuccs->end()) + return false; + if (!isCommonDomFrontier(*SI, entry, exit)) + return false; + } + + // Do not allow edges pointing into the region. + for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end(); + SI != SE; ++SI) + if (DT->properlyDominates(entry, *SI) && *SI != exit) + return false; + + + return true; +} + +void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit, + BBtoBBMap *ShortCut) const { + assert(entry && exit && "entry and exit must not be null!"); + + BBtoBBMap::iterator e = ShortCut->find(exit); + + if (e == ShortCut->end()) + // No further region at exit available. + (*ShortCut)[entry] = exit; + else { + // We found a region e that starts at exit. Therefore (entry, e->second) + // is also a region, that is larger than (entry, exit). Insert the + // larger one. + BasicBlock *BB = e->second; + (*ShortCut)[entry] = BB; + } +} + +DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N, + BBtoBBMap *ShortCut) const { + BBtoBBMap::iterator e = ShortCut->find(N->getBlock()); + + if (e == ShortCut->end()) + return N->getIDom(); + + return PDT->getNode(e->second)->getIDom(); +} + +bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const { + assert(entry && exit && "entry and exit must not be null!"); + + unsigned num_successors = succ_end(entry) - succ_begin(entry); + + if (num_successors <= 1 && exit == *(succ_begin(entry))) + return true; + + return false; +} + +void RegionInfo::updateStatistics(Region *R) { + ++numRegions; + + // TODO: Slow. Should only be enabled if -stats is used. + if (R->isSimple()) ++numSimpleRegions; +} + +Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) { + assert(entry && exit && "entry and exit must not be null!"); + + if (isTrivialRegion(entry, exit)) + return 0; + + Region *region = new Region(entry, exit, this, DT); + BBtoRegion.insert(std::make_pair(entry, region)); + + #ifdef XDEBUG + region->verifyRegion(); + #else + DEBUG(region->verifyRegion()); + #endif + + updateStatistics(region); + return region; +} + +void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) { + assert(entry); + + DomTreeNode *N = PDT->getNode(entry); + + if (!N) + return; + + Region *lastRegion= 0; + BasicBlock *lastExit = entry; + + // As only a BasicBlock that postdominates entry can finish a region, walk the + // post dominance tree upwards. + while ((N = getNextPostDom(N, ShortCut))) { + BasicBlock *exit = N->getBlock(); + + if (!exit) + break; + + if (isRegion(entry, exit)) { + Region *newRegion = createRegion(entry, exit); + + if (lastRegion) + newRegion->addSubRegion(lastRegion); + + lastRegion = newRegion; + lastExit = exit; + } + + // This can never be a region, so stop the search. + if (!DT->dominates(entry, exit)) + break; + } + + // Tried to create regions from entry to lastExit. Next time take a + // shortcut from entry to lastExit. + if (lastExit != entry) + insertShortCut(entry, lastExit, ShortCut); +} + +void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) { + BasicBlock *entry = &(F.getEntryBlock()); + DomTreeNode *N = DT->getNode(entry); + + // Iterate over the dominance tree in post order to start with the small + // regions from the bottom of the dominance tree. If the small regions are + // detected first, detection of bigger regions is faster, as we can jump + // over the small regions. + for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE; + ++FI) { + findRegionsWithEntry(FI->getBlock(), ShortCut); + } +} + +Region *RegionInfo::getTopMostParent(Region *region) { + while (region->parent) + region = region->getParent(); + + return region; +} + +void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) { + BasicBlock *BB = N->getBlock(); + + // Passed region exit + while (BB == region->getExit()) + region = region->getParent(); + + BBtoRegionMap::iterator it = BBtoRegion.find(BB); + + // This basic block is a start block of a region. It is already in the + // BBtoRegion relation. Only the child basic blocks have to be updated. + if (it != BBtoRegion.end()) { + Region *newRegion = it->second;; + region->addSubRegion(getTopMostParent(newRegion)); + region = newRegion; + } else { + BBtoRegion[BB] = region; + } + + for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI) + buildRegionsTree(*CI, region); +} + +void RegionInfo::releaseMemory() { + BBtoRegion.clear(); + if (TopLevelRegion) + delete TopLevelRegion; + TopLevelRegion = 0; +} + +RegionInfo::RegionInfo() : FunctionPass(ID) { + TopLevelRegion = 0; +} + +RegionInfo::~RegionInfo() { + releaseMemory(); +} + +void RegionInfo::Calculate(Function &F) { + // ShortCut a function where for every BB the exit of the largest region + // starting with BB is stored. These regions can be threated as single BBS. + // This improves performance on linear CFGs. + BBtoBBMap ShortCut; + + scanForRegions(F, &ShortCut); + BasicBlock *BB = &F.getEntryBlock(); + buildRegionsTree(DT->getNode(BB), TopLevelRegion); +} + +bool RegionInfo::runOnFunction(Function &F) { + releaseMemory(); + + DT = &getAnalysis<DominatorTree>(); + PDT = &getAnalysis<PostDominatorTree>(); + DF = &getAnalysis<DominanceFrontier>(); + + TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0); + updateStatistics(TopLevelRegion); + + Calculate(F); + + return false; +} + +void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DominatorTree>(); + AU.addRequired<PostDominatorTree>(); + AU.addRequired<DominanceFrontier>(); +} + +void RegionInfo::print(raw_ostream &OS, const Module *) const { + OS << "Region tree:\n"; + TopLevelRegion->print(OS, true, 0); + OS << "End region tree\n"; +} + +void RegionInfo::verifyAnalysis() const { + // Only do verification when user wants to, otherwise this expensive check + // will be invoked by PMDataManager::verifyPreservedAnalysis when + // a regionpass (marked PreservedAll) finish. + if (!VerifyRegionInfo) return; + + TopLevelRegion->verifyRegionNest(); +} + +// Region pass manager support. +Region *RegionInfo::getRegionFor(BasicBlock *BB) const { + BBtoRegionMap::const_iterator I= + BBtoRegion.find(BB); + return I != BBtoRegion.end() ? I->second : 0; +} + +Region *RegionInfo::operator[](BasicBlock *BB) const { + return getRegionFor(BB); +} + + +BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const { + BasicBlock *Exit = NULL; + + while (true) { + // Get largest region that starts at BB. + Region *R = getRegionFor(BB); + while (R && R->getParent() && R->getParent()->getEntry() == BB) + R = R->getParent(); + + // Get the single exit of BB. + if (R && R->getEntry() == BB) + Exit = R->getExit(); + else if (++succ_begin(BB) == succ_end(BB)) + Exit = *succ_begin(BB); + else // No single exit exists. + return Exit; + + // Get largest region that starts at Exit. + Region *ExitR = getRegionFor(Exit); + while (ExitR && ExitR->getParent() + && ExitR->getParent()->getEntry() == Exit) + ExitR = ExitR->getParent(); + + for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE; + ++PI) + if (!R->contains(*PI) && !ExitR->contains(*PI)) + break; + + // This stops infinite cycles. + if (DT->dominates(Exit, BB)) + break; + + BB = Exit; + } + + return Exit; +} + +Region* +RegionInfo::getCommonRegion(Region *A, Region *B) const { + assert (A && B && "One of the Regions is NULL"); + + if (A->contains(B)) return A; + + while (!B->contains(A)) + B = B->getParent(); + + return B; +} + +Region* +RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const { + Region* ret = Regions.back(); + Regions.pop_back(); + + for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(), + E = Regions.end(); I != E; ++I) + ret = getCommonRegion(ret, *I); + + return ret; +} + +Region* +RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const { + Region* ret = getRegionFor(BBs.back()); + BBs.pop_back(); + + for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(), + E = BBs.end(); I != E; ++I) + ret = getCommonRegion(ret, getRegionFor(*I)); + + return ret; +} + +char RegionInfo::ID = 0; +INITIALIZE_PASS(RegionInfo, "regions", + "Detect single entry single exit regions", true, true); + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +namespace llvm { + FunctionPass *createRegionInfoPass() { + return new RegionInfo(); + } +} + diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp new file mode 100644 index 0000000000000..fee5c1bae9764 --- /dev/null +++ b/lib/Analysis/RegionPrinter.cpp @@ -0,0 +1,186 @@ +//===- RegionPrinter.cpp - Print regions tree pass ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Print out the region tree of a function using dotty/graphviz. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Analysis/RegionPrinter.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +/// onlySimpleRegion - Show only the simple regions in the RegionViewer. +static cl::opt<bool> +onlySimpleRegions("only-simple-regions", + cl::desc("Show only simple regions in the graphviz viewer"), + cl::Hidden, + cl::init(false)); + +namespace llvm { +template<> +struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) + : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) { + + if (!Node->isSubRegion()) { + BasicBlock *BB = Node->getNodeAs<BasicBlock>(); + + if (isSimple()) + return DOTGraphTraits<const Function*> + ::getSimpleNodeLabel(BB, BB->getParent()); + else + return DOTGraphTraits<const Function*> + ::getCompleteNodeLabel(BB, BB->getParent()); + } + + return "Not implemented"; + } +}; + +template<> +struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits<RegionNode*>(isSimple) {} + + static std::string getGraphName(RegionInfo *DT) { + return "Region Graph"; + } + + std::string getNodeLabel(RegionNode *Node, RegionInfo *G) { + return DOTGraphTraits<RegionNode*>::getNodeLabel(Node, + G->getTopLevelRegion()); + } + + // Print the cluster of the subregions. This groups the single basic blocks + // and adds a different background color for each group. + static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW, + unsigned depth = 0) { + raw_ostream &O = GW.getOStream(); + O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R) + << " {\n"; + O.indent(2 * (depth + 1)) << "label = \"\";\n"; + + if (!onlySimpleRegions || R->isSimple()) { + O.indent(2 * (depth + 1)) << "style = filled;\n"; + O.indent(2 * (depth + 1)) << "color = " + << ((R->getDepth() * 2 % 12) + 1) << "\n"; + + } else { + O.indent(2 * (depth + 1)) << "style = solid;\n"; + O.indent(2 * (depth + 1)) << "color = " + << ((R->getDepth() * 2 % 12) + 2) << "\n"; + } + + for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) + printRegionCluster(*RI, GW, depth + 1); + + RegionInfo *RI = R->getRegionInfo(); + + for (Region::const_block_iterator BI = R->block_begin(), + BE = R->block_end(); BI != BE; ++BI) { + BasicBlock *BB = (*BI)->getNodeAs<BasicBlock>(); + if (RI->getRegionFor(BB) == R) + O.indent(2 * (depth + 1)) << "Node" + << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB)) + << ";\n"; + } + + O.indent(2 * depth) << "}\n"; + } + + static void addCustomGraphFeatures(const RegionInfo* RI, + GraphWriter<RegionInfo*> &GW) { + raw_ostream &O = GW.getOStream(); + O << "\tcolorscheme = \"paired12\"\n"; + printRegionCluster(RI->getTopLevelRegion(), GW, 4); + } +}; +} //end namespace llvm + +namespace { + +struct RegionViewer + : public DOTGraphTraitsViewer<RegionInfo, false> { + static char ID; + RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){} +}; + +char RegionViewer::ID = 0; +INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function", + true, true); + +struct RegionOnlyViewer + : public DOTGraphTraitsViewer<RegionInfo, true> { + static char ID; + RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID){} +}; + +char RegionOnlyViewer::ID = 0; +INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", + "View regions of function (with no function bodies)", + true, true); + +struct RegionPrinter + : public DOTGraphTraitsPrinter<RegionInfo, false> { + static char ID; + RegionPrinter() : + DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {} +}; +} //end anonymous namespace + +char RegionPrinter::ID = 0; +INITIALIZE_PASS(RegionPrinter, "dot-regions", + "Print regions of function to 'dot' file", true, true); + +namespace { + +struct RegionOnlyPrinter + : public DOTGraphTraitsPrinter<RegionInfo, true> { + static char ID; + RegionOnlyPrinter() : + DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {} +}; + +} + +char RegionOnlyPrinter::ID = 0; +INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only", + "Print regions of function to 'dot' file " + "(with no function bodies)", + true, true); + +FunctionPass* llvm::createRegionViewerPass() { + return new RegionViewer(); +} + +FunctionPass* llvm::createRegionOnlyViewerPass() { + return new RegionOnlyViewer(); +} + +FunctionPass* llvm::createRegionPrinterPass() { + return new RegionPrinter(); +} + +FunctionPass* llvm::createRegionOnlyPrinterPass() { + return new RegionOnlyPrinter(); +} + diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 413b3b47f92a4..b892d85f9f4a2 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -103,8 +103,8 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, "derived loop"), cl::init(100)); -static RegisterPass<ScalarEvolution> -R("scalar-evolution", "Scalar Evolution Analysis", false, true); +INITIALIZE_PASS(ScalarEvolution, "scalar-evolution", + "Scalar Evolution Analysis", false, true); char ScalarEvolution::ID = 0; //===----------------------------------------------------------------------===// @@ -251,28 +251,59 @@ void SCEVCommutativeExpr::print(raw_ostream &OS) const { OS << "("; for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) { OS << **I; - if (next(I) != E) + if (llvm::next(I) != E) OS << OpStr; } OS << ")"; } bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - if (!getOperand(i)->dominates(BB, DT)) + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) + if (!(*I)->dominates(BB, DT)) return false; - } return true; } bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - if (!getOperand(i)->properlyDominates(BB, DT)) + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) + if (!(*I)->properlyDominates(BB, DT)) return false; - } return true; } +bool SCEVNAryExpr::isLoopInvariant(const Loop *L) const { + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) + if (!(*I)->isLoopInvariant(L)) + return false; + return true; +} + +// hasComputableLoopEvolution - N-ary expressions have computable loop +// evolutions iff they have at least one operand that varies with the loop, +// but that all varying operands are computable. +bool SCEVNAryExpr::hasComputableLoopEvolution(const Loop *L) const { + bool HasVarying = false; + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) { + const SCEV *S = *I; + if (!S->isLoopInvariant(L)) { + if (S->hasComputableLoopEvolution(L)) + HasVarying = true; + else + return false; + } + } + return HasVarying; +} + +bool SCEVNAryExpr::hasOperand(const SCEV *O) const { + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) { + const SCEV *S = *I; + if (O == S || S->hasOperand(O)) + return true; + } + return false; +} + bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { return LHS->dominates(BB, DT) && RHS->dominates(BB, DT); } @@ -303,10 +334,14 @@ bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const { if (QueryLoop->contains(L)) return false; + // This recurrence is invariant w.r.t. QueryLoop if L contains QueryLoop. + if (L->contains(QueryLoop)) + return true; + // This recurrence is variant w.r.t. QueryLoop if any of its operands // are variant. - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (!getOperand(i)->isLoopInvariant(QueryLoop)) + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) + if (!(*I)->isLoopInvariant(QueryLoop)) return false; // Otherwise it's loop-invariant. @@ -337,12 +372,36 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const { OS << ">"; } +void SCEVUnknown::deleted() { + // Clear this SCEVUnknown from ValuesAtScopes. + SE->ValuesAtScopes.erase(this); + + // Remove this SCEVUnknown from the uniquing map. + SE->UniqueSCEVs.RemoveNode(this); + + // Release the value. + setValPtr(0); +} + +void SCEVUnknown::allUsesReplacedWith(Value *New) { + // Clear this SCEVUnknown from ValuesAtScopes. + SE->ValuesAtScopes.erase(this); + + // Remove this SCEVUnknown from the uniquing map. + SE->UniqueSCEVs.RemoveNode(this); + + // Update this SCEVUnknown to point to the new value. This is needed + // because there may still be outstanding SCEVs which still point to + // this SCEVUnknown. + setValPtr(New); +} + bool SCEVUnknown::isLoopInvariant(const Loop *L) const { // All non-instruction values are loop invariant. All instructions are loop // invariant if they are not contained in the specified loop. // Instructions are never considered invariant in the function body // (null loop) because they are defined within the "loop". - if (Instruction *I = dyn_cast<Instruction>(V)) + if (Instruction *I = dyn_cast<Instruction>(getValue())) return L && !L->contains(I); return true; } @@ -360,11 +419,11 @@ bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { } const Type *SCEVUnknown::getType() const { - return V->getType(); + return getValue()->getType(); } bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const { - if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V)) + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) if (CE->getOpcode() == Instruction::GetElementPtr && @@ -381,7 +440,7 @@ bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const { } bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const { - if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V)) + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) if (CE->getOpcode() == Instruction::GetElementPtr && @@ -406,7 +465,7 @@ bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const { } bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const { - if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V)) + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) if (CE->getOpcode() == Instruction::GetElementPtr && @@ -448,166 +507,183 @@ void SCEVUnknown::print(raw_ostream &OS) const { } // Otherwise just print it normally. - WriteAsOperand(OS, V, false); + WriteAsOperand(OS, getValue(), false); } //===----------------------------------------------------------------------===// // SCEV Utilities //===----------------------------------------------------------------------===// -static bool CompareTypes(const Type *A, const Type *B) { - if (A->getTypeID() != B->getTypeID()) - return A->getTypeID() < B->getTypeID(); - if (const IntegerType *AI = dyn_cast<IntegerType>(A)) { - const IntegerType *BI = cast<IntegerType>(B); - return AI->getBitWidth() < BI->getBitWidth(); - } - if (const PointerType *AI = dyn_cast<PointerType>(A)) { - const PointerType *BI = cast<PointerType>(B); - return CompareTypes(AI->getElementType(), BI->getElementType()); - } - if (const ArrayType *AI = dyn_cast<ArrayType>(A)) { - const ArrayType *BI = cast<ArrayType>(B); - if (AI->getNumElements() != BI->getNumElements()) - return AI->getNumElements() < BI->getNumElements(); - return CompareTypes(AI->getElementType(), BI->getElementType()); - } - if (const VectorType *AI = dyn_cast<VectorType>(A)) { - const VectorType *BI = cast<VectorType>(B); - if (AI->getNumElements() != BI->getNumElements()) - return AI->getNumElements() < BI->getNumElements(); - return CompareTypes(AI->getElementType(), BI->getElementType()); - } - if (const StructType *AI = dyn_cast<StructType>(A)) { - const StructType *BI = cast<StructType>(B); - if (AI->getNumElements() != BI->getNumElements()) - return AI->getNumElements() < BI->getNumElements(); - for (unsigned i = 0, e = AI->getNumElements(); i != e; ++i) - if (CompareTypes(AI->getElementType(i), BI->getElementType(i)) || - CompareTypes(BI->getElementType(i), AI->getElementType(i))) - return CompareTypes(AI->getElementType(i), BI->getElementType(i)); - } - return false; -} - namespace { /// SCEVComplexityCompare - Return true if the complexity of the LHS is less /// than the complexity of the RHS. This comparator is used to canonicalize /// expressions. class SCEVComplexityCompare { - LoopInfo *LI; + const LoopInfo *const LI; public: - explicit SCEVComplexityCompare(LoopInfo *li) : LI(li) {} + explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} + // Return true or false if LHS is less than, or at least RHS, respectively. bool operator()(const SCEV *LHS, const SCEV *RHS) const { + return compare(LHS, RHS) < 0; + } + + // Return negative, zero, or positive, if LHS is less than, equal to, or + // greater than RHS, respectively. A three-way result allows recursive + // comparisons to be more efficient. + int compare(const SCEV *LHS, const SCEV *RHS) const { // Fast-path: SCEVs are uniqued so we can do a quick equality check. if (LHS == RHS) - return false; + return 0; // Primarily, sort the SCEVs by their getSCEVType(). - if (LHS->getSCEVType() != RHS->getSCEVType()) - return LHS->getSCEVType() < RHS->getSCEVType(); + unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); + if (LType != RType) + return (int)LType - (int)RType; // Aside from the getSCEVType() ordering, the particular ordering // isn't very important except that it's beneficial to be consistent, // so that (a + b) and (b + a) don't end up as different expressions. - - // Sort SCEVUnknown values with some loose heuristics. TODO: This is - // not as complete as it could be. - if (const SCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS)) { + switch (LType) { + case scUnknown: { + const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); + // Sort SCEVUnknown values with some loose heuristics. TODO: This is + // not as complete as it could be. + const Value *LV = LU->getValue(), *RV = RU->getValue(); + // Order pointer values after integer values. This helps SCEVExpander // form GEPs. - if (LU->getType()->isPointerTy() && !RU->getType()->isPointerTy()) - return false; - if (RU->getType()->isPointerTy() && !LU->getType()->isPointerTy()) - return true; + bool LIsPointer = LV->getType()->isPointerTy(), + RIsPointer = RV->getType()->isPointerTy(); + if (LIsPointer != RIsPointer) + return (int)LIsPointer - (int)RIsPointer; // Compare getValueID values. - if (LU->getValue()->getValueID() != RU->getValue()->getValueID()) - return LU->getValue()->getValueID() < RU->getValue()->getValueID(); + unsigned LID = LV->getValueID(), + RID = RV->getValueID(); + if (LID != RID) + return (int)LID - (int)RID; // Sort arguments by their position. - if (const Argument *LA = dyn_cast<Argument>(LU->getValue())) { - const Argument *RA = cast<Argument>(RU->getValue()); - return LA->getArgNo() < RA->getArgNo(); + if (const Argument *LA = dyn_cast<Argument>(LV)) { + const Argument *RA = cast<Argument>(RV); + unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); + return (int)LArgNo - (int)RArgNo; } - // For instructions, compare their loop depth, and their opcode. - // This is pretty loose. - if (Instruction *LV = dyn_cast<Instruction>(LU->getValue())) { - Instruction *RV = cast<Instruction>(RU->getValue()); + // For instructions, compare their loop depth, and their operand + // count. This is pretty loose. + if (const Instruction *LInst = dyn_cast<Instruction>(LV)) { + const Instruction *RInst = cast<Instruction>(RV); // Compare loop depths. - if (LI->getLoopDepth(LV->getParent()) != - LI->getLoopDepth(RV->getParent())) - return LI->getLoopDepth(LV->getParent()) < - LI->getLoopDepth(RV->getParent()); - - // Compare opcodes. - if (LV->getOpcode() != RV->getOpcode()) - return LV->getOpcode() < RV->getOpcode(); + const BasicBlock *LParent = LInst->getParent(), + *RParent = RInst->getParent(); + if (LParent != RParent) { + unsigned LDepth = LI->getLoopDepth(LParent), + RDepth = LI->getLoopDepth(RParent); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; + } // Compare the number of operands. - if (LV->getNumOperands() != RV->getNumOperands()) - return LV->getNumOperands() < RV->getNumOperands(); + unsigned LNumOps = LInst->getNumOperands(), + RNumOps = RInst->getNumOperands(); + return (int)LNumOps - (int)RNumOps; } - return false; + return 0; } - // Compare constant values. - if (const SCEVConstant *LC = dyn_cast<SCEVConstant>(LHS)) { + case scConstant: { + const SCEVConstant *LC = cast<SCEVConstant>(LHS); const SCEVConstant *RC = cast<SCEVConstant>(RHS); - if (LC->getValue()->getBitWidth() != RC->getValue()->getBitWidth()) - return LC->getValue()->getBitWidth() < RC->getValue()->getBitWidth(); - return LC->getValue()->getValue().ult(RC->getValue()->getValue()); + + // Compare constant values. + const APInt &LA = LC->getValue()->getValue(); + const APInt &RA = RC->getValue()->getValue(); + unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); + if (LBitWidth != RBitWidth) + return (int)LBitWidth - (int)RBitWidth; + return LA.ult(RA) ? -1 : 1; } - // Compare addrec loop depths. - if (const SCEVAddRecExpr *LA = dyn_cast<SCEVAddRecExpr>(LHS)) { + case scAddRecExpr: { + const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); - if (LA->getLoop()->getLoopDepth() != RA->getLoop()->getLoopDepth()) - return LA->getLoop()->getLoopDepth() < RA->getLoop()->getLoopDepth(); + + // Compare addrec loop depths. + const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); + if (LLoop != RLoop) { + unsigned LDepth = LLoop->getLoopDepth(), + RDepth = RLoop->getLoopDepth(); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; + } + + // Addrec complexity grows with operand count. + unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + + // Lexicographically compare. + for (unsigned i = 0; i != LNumOps; ++i) { + long X = compare(LA->getOperand(i), RA->getOperand(i)); + if (X != 0) + return X; + } + + return 0; } - // Lexicographically compare n-ary expressions. - if (const SCEVNAryExpr *LC = dyn_cast<SCEVNAryExpr>(LHS)) { + case scAddExpr: + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: { + const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); - for (unsigned i = 0, e = LC->getNumOperands(); i != e; ++i) { - if (i >= RC->getNumOperands()) - return false; - if (operator()(LC->getOperand(i), RC->getOperand(i))) - return true; - if (operator()(RC->getOperand(i), LC->getOperand(i))) - return false; + + // Lexicographically compare n-ary expressions. + unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + for (unsigned i = 0; i != LNumOps; ++i) { + if (i >= RNumOps) + return 1; + long X = compare(LC->getOperand(i), RC->getOperand(i)); + if (X != 0) + return X; } - return LC->getNumOperands() < RC->getNumOperands(); + return (int)LNumOps - (int)RNumOps; } - // Lexicographically compare udiv expressions. - if (const SCEVUDivExpr *LC = dyn_cast<SCEVUDivExpr>(LHS)) { + case scUDivExpr: { + const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); - if (operator()(LC->getLHS(), RC->getLHS())) - return true; - if (operator()(RC->getLHS(), LC->getLHS())) - return false; - if (operator()(LC->getRHS(), RC->getRHS())) - return true; - if (operator()(RC->getRHS(), LC->getRHS())) - return false; - return false; + + // Lexicographically compare udiv expressions. + long X = compare(LC->getLHS(), RC->getLHS()); + if (X != 0) + return X; + return compare(LC->getRHS(), RC->getRHS()); } - // Compare cast expressions by operand. - if (const SCEVCastExpr *LC = dyn_cast<SCEVCastExpr>(LHS)) { + case scTruncate: + case scZeroExtend: + case scSignExtend: { + const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); - return operator()(LC->getOperand(), RC->getOperand()); + + // Compare cast expressions by operand. + return compare(LC->getOperand(), RC->getOperand()); + } + + default: + break; } llvm_unreachable("Unknown SCEV kind!"); - return false; + return 0; } }; } @@ -628,8 +704,9 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, if (Ops.size() == 2) { // This is the common case, which also happens to be trivially simple. // Special case it. - if (SCEVComplexityCompare(LI)(Ops[1], Ops[0])) - std::swap(Ops[0], Ops[1]); + const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; + if (SCEVComplexityCompare(LI)(RHS, LHS)) + std::swap(LHS, RHS); return; } @@ -845,6 +922,13 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, return getAddRecExpr(Operands, AddRec->getLoop()); } + // As a special case, fold trunc(undef) to undef. We don't want to + // know too much about SCEVUnknowns, but this special case is handy + // and harmless. + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op)) + if (isa<UndefValue>(U->getValue())) + return getSCEV(UndefValue::get(Ty)); + // The cast wasn't folded; create an explicit cast node. We can reuse // the existing insert position since if we get here, we won't have // made any changes which would invalidate it. @@ -1163,6 +1247,13 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, return getAddRecExpr(Ops, AR->getLoop()); } + // As a special case, fold anyext(undef) to undef. We don't want to + // know too much about SCEVUnknowns, but this special case is handy + // and harmless. + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op)) + if (isa<UndefValue>(U->getValue())) + return getSCEV(UndefValue::get(Ty)); + // If the expression is obviously signed, use the sext cast value. if (isa<SCEVSMaxExpr>(Op)) return SExt; @@ -1287,8 +1378,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // If HasNSW is true and all the operands are non-negative, infer HasNUW. if (!HasNUW && HasNSW) { bool All = true; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (!isKnownNonNegative(Ops[i])) { + for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), + E = Ops.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { All = false; break; } @@ -1321,22 +1413,29 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, if (Ops.size() == 1) return Ops[0]; } - // Okay, check to see if the same value occurs in the operand list twice. If - // so, merge them together into an multiply expression. Since we sorted the - // list, these values are required to be adjacent. + // Okay, check to see if the same value occurs in the operand list more than + // once. If so, merge them together into an multiply expression. Since we + // sorted the list, these values are required to be adjacent. const Type *Ty = Ops[0]->getType(); - for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) + bool FoundMatch = false; + for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 - // Found a match, merge the two values into a multiply, and add any - // remaining values to the result. - const SCEV *Two = getConstant(Ty, 2); - const SCEV *Mul = getMulExpr(Ops[i], Two); - if (Ops.size() == 2) + // Scan ahead to count how many equal operands there are. + unsigned Count = 2; + while (i+Count != e && Ops[i+Count] == Ops[i]) + ++Count; + // Merge the values into a multiply. + const SCEV *Scale = getConstant(Ty, Count); + const SCEV *Mul = getMulExpr(Scale, Ops[i]); + if (Ops.size() == Count) return Mul; - Ops.erase(Ops.begin()+i, Ops.begin()+i+2); - Ops.push_back(Mul); - return getAddExpr(Ops, HasNUW, HasNSW); + Ops[i] = Mul; + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count); + --i; e -= Count - 1; + FoundMatch = true; } + if (FoundMatch) + return getAddExpr(Ops, HasNUW, HasNSW); // Check for truncates. If all the operands are truncated from the same // type, see if factoring out the truncate would permit the result to be @@ -1433,7 +1532,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; - for (SmallVector<const SCEV *, 8>::iterator I = NewOps.begin(), + for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(), E = NewOps.end(); I != E; ++I) MulOpLists[M.find(*I)->second].push_back(*I); // Re-generate the operands list. @@ -1460,20 +1559,23 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]); for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { const SCEV *MulOpSCEV = Mul->getOperand(MulOp); + if (isa<SCEVConstant>(MulOpSCEV)) + continue; for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) - if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) { + if (MulOpSCEV == Ops[AddOp]) { // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) const SCEV *InnerMul = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { // If the multiply has more than two operands, we must get the // Y*Z term. - SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), Mul->op_end()); - MulOps.erase(MulOps.begin()+MulOp); + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), + Mul->op_begin()+MulOp); + MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); InnerMul = getMulExpr(MulOps); } const SCEV *One = getConstant(Ty, 1); - const SCEV *AddOne = getAddExpr(InnerMul, One); - const SCEV *OuterMul = getMulExpr(AddOne, Ops[AddOp]); + const SCEV *AddOne = getAddExpr(One, InnerMul); + const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); if (Ops.size() == 2) return OuterMul; if (AddOp < Idx) { Ops.erase(Ops.begin()+AddOp); @@ -1500,15 +1602,15 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), - Mul->op_end()); - MulOps.erase(MulOps.begin()+MulOp); + Mul->op_begin()+MulOp); + MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); InnerMul1 = getMulExpr(MulOps); } const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); if (OtherMul->getNumOperands() != 2) { SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(), - OtherMul->op_end()); - MulOps.erase(MulOps.begin()+OMulOp); + OtherMul->op_begin()+OMulOp); + MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); InnerMul2 = getMulExpr(MulOps); } const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2); @@ -1574,30 +1676,31 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // there are multiple AddRec's with the same loop induction variable being // added together. If so, we can fold them. for (unsigned OtherIdx = Idx+1; - OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx) - if (OtherIdx != Idx) { - const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]); - if (AddRecLoop == OtherAddRec->getLoop()) { - // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} - SmallVector<const SCEV *, 4> NewOps(AddRec->op_begin(), - AddRec->op_end()); - for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) { - if (i >= NewOps.size()) { - NewOps.append(OtherAddRec->op_begin()+i, - OtherAddRec->op_end()); - break; + OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { + // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L> + SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), + AddRec->op_end()); + for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (const SCEVAddRecExpr *OtherAddRec = + dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) + if (OtherAddRec->getLoop() == AddRecLoop) { + for (unsigned i = 0, e = OtherAddRec->getNumOperands(); + i != e; ++i) { + if (i >= AddRecOps.size()) { + AddRecOps.append(OtherAddRec->op_begin()+i, + OtherAddRec->op_end()); + break; + } + AddRecOps[i] = getAddExpr(AddRecOps[i], + OtherAddRec->getOperand(i)); + } + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; } - NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i)); - } - const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRecLoop); - - if (Ops.size() == 2) return NewAddRec; - - Ops.erase(Ops.begin()+Idx); - Ops.erase(Ops.begin()+OtherIdx-1); - Ops.push_back(NewAddRec); - return getAddExpr(Ops); - } + Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop); + return getAddExpr(Ops); } // Otherwise couldn't fold anything into this recurrence. Move onto the @@ -1633,17 +1736,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, assert(!Ops.empty() && "Cannot get empty mul!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) - assert(getEffectiveSCEVType(Ops[i]->getType()) == - getEffectiveSCEVType(Ops[0]->getType()) && + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVMulExpr operand types don't match!"); #endif // If HasNSW is true and all the operands are non-negative, infer HasNUW. if (!HasNUW && HasNSW) { bool All = true; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (!isKnownNonNegative(Ops[i])) { + for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), + E = Ops.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { All = false; break; } @@ -1740,8 +1844,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // they are loop invariant w.r.t. the recurrence. SmallVector<const SCEV *, 8> LIOps; const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); + const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { + if (Ops[i]->isLoopInvariant(AddRecLoop)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; @@ -1758,7 +1863,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // Build the new addrec. Propagate the NUW and NSW flags if both the // outer mul and the inner addrec are guaranteed to have no overflow. - const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(), + const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, HasNUW && AddRec->hasNoUnsignedWrap(), HasNSW && AddRec->hasNoSignedWrap()); @@ -1778,28 +1883,30 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // there are multiple AddRec's with the same loop induction variable being // multiplied together. If so, we can fold them. for (unsigned OtherIdx = Idx+1; - OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx) - if (OtherIdx != Idx) { - const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]); - if (AddRec->getLoop() == OtherAddRec->getLoop()) { - // F * G --> {A,+,B} * {C,+,D} --> {A*C,+,F*D + G*B + B*D} - const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec; - const SCEV *NewStart = getMulExpr(F->getStart(), - G->getStart()); - const SCEV *B = F->getStepRecurrence(*this); - const SCEV *D = G->getStepRecurrence(*this); - const SCEV *NewStep = getAddExpr(getMulExpr(F, D), - getMulExpr(G, B), - getMulExpr(B, D)); - const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep, - F->getLoop()); - if (Ops.size() == 2) return NewAddRec; - - Ops.erase(Ops.begin()+Idx); - Ops.erase(Ops.begin()+OtherIdx-1); - Ops.push_back(NewAddRec); - return getMulExpr(Ops); - } + OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { + // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> --> + // {A*C,+,F*D + G*B + B*D}<L> + for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (const SCEVAddRecExpr *OtherAddRec = + dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) + if (OtherAddRec->getLoop() == AddRecLoop) { + const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec; + const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart()); + const SCEV *B = F->getStepRecurrence(*this); + const SCEV *D = G->getStepRecurrence(*this); + const SCEV *NewStep = getAddExpr(getMulExpr(F, D), + getMulExpr(G, B), + getMulExpr(B, D)); + const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep, + F->getLoop()); + if (Ops.size() == 2) return NewAddRec; + Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec); + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + } + return getMulExpr(Ops); } // Otherwise couldn't fold anything into this recurrence. Move onto the @@ -1848,7 +1955,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, // TODO: Generalize this to non-constants by using known-bits information. const Type *Ty = LHS->getType(); unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); - unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ; + unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; // For non-power-of-two values, effectively round the value up to the // nearest power of two. if (!RHSC->getValue()->getValue().isPowerOf2()) @@ -1955,9 +2062,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, bool HasNUW, bool HasNSW) { if (Operands.size() == 1) return Operands[0]; #ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); for (unsigned i = 1, e = Operands.size(); i != e; ++i) - assert(getEffectiveSCEVType(Operands[i]->getType()) == - getEffectiveSCEVType(Operands[0]->getType()) && + assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy && "SCEVAddRecExpr operand types don't match!"); #endif @@ -1975,8 +2082,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, // If HasNSW is true and all the operands are non-negative, infer HasNUW. if (!HasNUW && HasNSW) { bool All = true; - for (unsigned i = 0, e = Operands.size(); i != e; ++i) - if (!isKnownNonNegative(Operands[i])) { + for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(), + E = Operands.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { All = false; break; } @@ -1986,9 +2094,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, // Canonicalize nested AddRecs in by nesting them in order of loop depth. if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { const Loop *NestedLoop = NestedAR->getLoop(); - if (L->contains(NestedLoop->getHeader()) ? + if (L->contains(NestedLoop) ? (L->getLoopDepth() < NestedLoop->getLoopDepth()) : - (!NestedLoop->contains(L->getHeader()) && + (!NestedLoop->contains(L) && DT->dominates(L->getHeader(), NestedLoop->getHeader()))) { SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(), NestedAR->op_end()); @@ -2055,9 +2163,9 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { assert(!Ops.empty() && "Cannot get empty smax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) - assert(getEffectiveSCEVType(Ops[i]->getType()) == - getEffectiveSCEVType(Ops[0]->getType()) && + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVSMaxExpr operand types don't match!"); #endif @@ -2160,9 +2268,9 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { assert(!Ops.empty() && "Cannot get empty umax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) - assert(getEffectiveSCEVType(Ops[i]->getType()) == - getEffectiveSCEVType(Ops[0]->getType()) && + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVUMaxExpr operand types don't match!"); #endif @@ -2326,8 +2434,14 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { ID.AddInteger(scUnknown); ID.AddPointer(V); void *IP = 0; - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V); + if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { + assert(cast<SCEVUnknown>(S)->getValue() == V && + "Stale SCEVUnknown in uniquing map!"); + return S; + } + SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, + FirstUnknown); + FirstUnknown = cast<SCEVUnknown>(S); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -2391,10 +2505,15 @@ const SCEV *ScalarEvolution::getCouldNotCompute() { const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - std::map<SCEVCallbackVH, const SCEV *>::iterator I = Scalars.find(V); - if (I != Scalars.end()) return I->second; + ValueExprMapType::const_iterator I = ValueExprMap.find(V); + if (I != ValueExprMap.end()) return I->second; const SCEV *S = createSCEV(V); - Scalars.insert(std::make_pair(SCEVCallbackVH(V, this), S)); + + // The process of creating a SCEV for V may have caused other SCEVs + // to have been created, so it's necessary to insert the new entry + // from scratch, rather than trying to remember the insert position + // above. + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); return S; } @@ -2428,6 +2547,10 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { /// const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS) { + // Fast path: X - X --> 0. + if (LHS == RHS) + return getConstant(LHS->getType(), 0); + // X - Y --> X + -Y return getAddExpr(LHS, getNegativeSCEV(RHS)); } @@ -2570,12 +2693,12 @@ PushDefUseChildren(Instruction *I, // Push the def-use children onto the Worklist stack. for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) - Worklist.push_back(cast<Instruction>(UI)); + Worklist.push_back(cast<Instruction>(*UI)); } /// ForgetSymbolicValue - This looks up computed SCEV values for all /// instructions that depend on the given instruction and removes them from -/// the Scalars map if they reference SymName. This is used during PHI +/// the ValueExprMapType map if they reference SymName. This is used during PHI /// resolution. void ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { @@ -2588,9 +2711,9 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - std::map<SCEVCallbackVH, const SCEV *>::iterator It = - Scalars.find(static_cast<Value *>(I)); - if (It != Scalars.end()) { + ValueExprMapType::iterator It = + ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { // Short-circuit the def-use traversal if the symbolic name // ceases to appear in expressions. if (It->second != SymName && !It->second->hasOperand(SymName)) @@ -2607,7 +2730,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { !isa<SCEVUnknown>(It->second) || (I != PN && It->second == SymName)) { ValuesAtScopes.erase(It->second); - Scalars.erase(It); + ValueExprMap.erase(It); } } @@ -2644,9 +2767,9 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (BEValueV && StartValueV) { // While we are analyzing this PHI node, handle its value symbolically. const SCEV *SymbolicName = getUnknown(PN); - assert(Scalars.find(PN) == Scalars.end() && + assert(ValueExprMap.find(PN) == ValueExprMap.end() && "PHI node already processed?"); - Scalars.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); // Using this symbolic name for the PHI, analyze the value coming around // the back-edge. @@ -2707,7 +2830,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. ForgetSymbolicName(PN, SymbolicName); - Scalars[SCEVCallbackVH(PN, this)] = PHISCEV; + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; return PHISCEV; } } @@ -2732,7 +2855,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. ForgetSymbolicName(PN, SymbolicName); - Scalars[SCEVCallbackVH(PN, this)] = PHISCEV; + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; return PHISCEV; } } @@ -2777,7 +2900,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { return getUnknown(GEP); const SCEV *TotalOffset = getConstant(IntPtrTy, 0); gep_type_iterator GTI = gep_type_begin(GEP); - for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()), + for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()), E = GEP->op_end(); I != E; ++I) { Value *Index = *I; @@ -3200,12 +3323,42 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { Operator *U = cast<Operator>(V); switch (Opcode) { - case Instruction::Add: - return getAddExpr(getSCEV(U->getOperand(0)), - getSCEV(U->getOperand(1))); - case Instruction::Mul: - return getMulExpr(getSCEV(U->getOperand(0)), - getSCEV(U->getOperand(1))); + case Instruction::Add: { + // The simple thing to do would be to just call getSCEV on both operands + // and call getAddExpr with the result. However if we're looking at a + // bunch of things all added together, this can be quite inefficient, + // because it leads to N-1 getAddExpr calls for N ultimate operands. + // Instead, gather up all the operands and make a single getAddExpr call. + // LLVM IR canonical form means we need only traverse the left operands. + SmallVector<const SCEV *, 4> AddOps; + AddOps.push_back(getSCEV(U->getOperand(1))); + for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { + unsigned Opcode = Op->getValueID() - Value::InstructionVal; + if (Opcode != Instruction::Add && Opcode != Instruction::Sub) + break; + U = cast<Operator>(Op); + const SCEV *Op1 = getSCEV(U->getOperand(1)); + if (Opcode == Instruction::Sub) + AddOps.push_back(getNegativeSCEV(Op1)); + else + AddOps.push_back(Op1); + } + AddOps.push_back(getSCEV(U->getOperand(0))); + return getAddExpr(AddOps); + } + case Instruction::Mul: { + // See the Add code above. + SmallVector<const SCEV *, 4> MulOps; + MulOps.push_back(getSCEV(U->getOperand(1))); + for (Value *Op = U->getOperand(0); + Op->getValueID() == Instruction::Mul + Value::InstructionVal; + Op = U->getOperand(0)) { + U = cast<Operator>(Op); + MulOps.push_back(getSCEV(U->getOperand(1))); + } + MulOps.push_back(getSCEV(U->getOperand(0))); + return getMulExpr(MulOps); + } case Instruction::UDiv: return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); @@ -3467,7 +3620,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { const SCEV *LDiff = getMinusSCEV(LA, LS); const SCEV *RDiff = getMinusSCEV(RA, One); if (LDiff == RDiff) - return getAddExpr(getUMaxExpr(LS, One), LDiff); + return getAddExpr(getUMaxExpr(One, LS), LDiff); } break; case ICmpInst::ICMP_EQ: @@ -3482,7 +3635,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { const SCEV *LDiff = getMinusSCEV(LA, One); const SCEV *RDiff = getMinusSCEV(RA, LS); if (LDiff == RDiff) - return getAddExpr(getUMaxExpr(LS, One), LDiff); + return getAddExpr(getUMaxExpr(One, LS), LDiff); } break; default: @@ -3579,9 +3732,9 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - std::map<SCEVCallbackVH, const SCEV *>::iterator It = - Scalars.find(static_cast<Value *>(I)); - if (It != Scalars.end()) { + ValueExprMapType::iterator It = + ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { // SCEVUnknown for a PHI either means that it has an unrecognized // structure, or it's a PHI that's in the progress of being computed // by createNodeForPHI. In the former case, additional loop trip @@ -3590,7 +3743,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // own when it gets to that point. if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) { ValuesAtScopes.erase(It->second); - Scalars.erase(It); + ValueExprMap.erase(It); } if (PHINode *PN = dyn_cast<PHINode>(I)) ConstantEvolutionLoopExitValue.erase(PN); @@ -3619,11 +3772,10 @@ void ScalarEvolution::forgetLoop(const Loop *L) { Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - std::map<SCEVCallbackVH, const SCEV *>::iterator It = - Scalars.find(static_cast<Value *>(I)); - if (It != Scalars.end()) { + ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { ValuesAtScopes.erase(It->second); - Scalars.erase(It); + ValueExprMap.erase(It); if (PHINode *PN = dyn_cast<PHINode>(I)) ConstantEvolutionLoopExitValue.erase(PN); } @@ -3648,35 +3800,14 @@ void ScalarEvolution::forgetValue(Value *V) { I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - std::map<SCEVCallbackVH, const SCEV *>::iterator It = - Scalars.find(static_cast<Value *>(I)); - if (It != Scalars.end()) { + ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { ValuesAtScopes.erase(It->second); - Scalars.erase(It); + ValueExprMap.erase(It); if (PHINode *PN = dyn_cast<PHINode>(I)) ConstantEvolutionLoopExitValue.erase(PN); } - // If there's a SCEVUnknown tying this value into the SCEV - // space, remove it from the folding set map. The SCEVUnknown - // object and any other SCEV objects which reference it - // (transitively) remain allocated, effectively leaked until - // the underlying BumpPtrAllocator is freed. - // - // This permits SCEV pointers to be used as keys in maps - // such as the ValuesAtScopes map. - FoldingSetNodeID ID; - ID.AddInteger(scUnknown); - ID.AddPointer(I); - void *IP; - if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { - UniqueSCEVs.RemoveNode(S); - - // This isn't necessary, but we might as well remove the - // value from the ValuesAtScopes map too. - ValuesAtScopes.erase(S); - } - PushDefUseChildren(I, Worklist); } } @@ -3816,14 +3947,13 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, else MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); } else { - // Both conditions must be true for the loop to exit. + // Both conditions must be true at the same time for the loop to exit. + // For now, be conservative. assert(L->contains(FBB) && "Loop block has no successor in loop!"); - if (BTI0.Exact != getCouldNotCompute() && - BTI1.Exact != getCouldNotCompute()) - BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact); - if (BTI0.Max != getCouldNotCompute() && - BTI1.Max != getCouldNotCompute()) - MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max); + if (BTI0.Max == BTI1.Max) + MaxBECount = BTI0.Max; + if (BTI0.Exact == BTI1.Exact) + BECount = BTI0.Exact; } return BackedgeTakenInfo(BECount, MaxBECount); @@ -3851,14 +3981,13 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, else MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); } else { - // Both conditions must be false for the loop to exit. + // Both conditions must be false at the same time for the loop to exit. + // For now, be conservative. assert(L->contains(TBB) && "Loop block has no successor in loop!"); - if (BTI0.Exact != getCouldNotCompute() && - BTI1.Exact != getCouldNotCompute()) - BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact); - if (BTI0.Max != getCouldNotCompute() && - BTI1.Max != getCouldNotCompute()) - MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max); + if (BTI0.Max == BTI1.Max) + MaxBECount = BTI0.Max; + if (BTI0.Exact == BTI1.Exact) + BECount = BTI0.Exact; } return BackedgeTakenInfo(BECount, MaxBECount); @@ -4203,7 +4332,7 @@ Constant * ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, const APInt &BEs, const Loop *L) { - std::map<PHINode*, Constant*>::iterator I = + std::map<PHINode*, Constant*>::const_iterator I = ConstantEvolutionLoopExitValue.find(PN); if (I != ConstantEvolutionLoopExitValue.end()) return I->second; @@ -5185,7 +5314,8 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, LoopContinuePredicate->isUnconditional()) return false; - return isImpliedCond(LoopContinuePredicate->getCondition(), Pred, LHS, RHS, + return isImpliedCond(Pred, LHS, RHS, + LoopContinuePredicate->getCondition(), LoopContinuePredicate->getSuccessor(0) != L->getHeader()); } @@ -5214,7 +5344,8 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, LoopEntryPredicate->isUnconditional()) continue; - if (isImpliedCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS, + if (isImpliedCond(Pred, LHS, RHS, + LoopEntryPredicate->getCondition(), LoopEntryPredicate->getSuccessor(0) != Pair.second)) return true; } @@ -5224,24 +5355,24 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, /// isImpliedCond - Test whether the condition described by Pred, LHS, /// and RHS is true whenever the given Cond value evaluates to true. -bool ScalarEvolution::isImpliedCond(Value *CondValue, - ICmpInst::Predicate Pred, +bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + Value *FoundCondValue, bool Inverse) { // Recursively handle And and Or conditions. - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { if (!Inverse) - return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || - isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); } else if (BO->getOpcode() == Instruction::Or) { if (Inverse) - return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || - isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); } } - ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue); + ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); if (!ICI) return false; // Bail if the ICmp's operands' types are wider than the needed type @@ -5658,20 +5789,19 @@ void ScalarEvolution::SCEVCallbackVH::deleted() { assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); if (PHINode *PN = dyn_cast<PHINode>(getValPtr())) SE->ConstantEvolutionLoopExitValue.erase(PN); - SE->Scalars.erase(getValPtr()); + SE->ValueExprMap.erase(getValPtr()); // this now dangles! } -void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) { +void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); // Forget all the expressions associated with users of the old value, // so that future queries will recompute the expressions using the new // value. + Value *Old = getValPtr(); SmallVector<User *, 16> Worklist; SmallPtrSet<User *, 8> Visited; - Value *Old = getValPtr(); - bool DeleteOld = false; for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); UI != UE; ++UI) Worklist.push_back(*UI); @@ -5679,27 +5809,22 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) { User *U = Worklist.pop_back_val(); // Deleting the Old value will cause this to dangle. Postpone // that until everything else is done. - if (U == Old) { - DeleteOld = true; + if (U == Old) continue; - } if (!Visited.insert(U)) continue; if (PHINode *PN = dyn_cast<PHINode>(U)) SE->ConstantEvolutionLoopExitValue.erase(PN); - SE->Scalars.erase(U); + SE->ValueExprMap.erase(U); for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); UI != UE; ++UI) Worklist.push_back(*UI); } - // Delete the Old value if it (indirectly) references itself. - if (DeleteOld) { - if (PHINode *PN = dyn_cast<PHINode>(Old)) - SE->ConstantEvolutionLoopExitValue.erase(PN); - SE->Scalars.erase(Old); - // this now dangles! - } - // this may dangle! + // Delete the Old value. + if (PHINode *PN = dyn_cast<PHINode>(Old)) + SE->ConstantEvolutionLoopExitValue.erase(PN); + SE->ValueExprMap.erase(Old); + // this now dangles! } ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) @@ -5710,7 +5835,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(&ID) { + : FunctionPass(ID), FirstUnknown(0) { } bool ScalarEvolution::runOnFunction(Function &F) { @@ -5722,7 +5847,13 @@ bool ScalarEvolution::runOnFunction(Function &F) { } void ScalarEvolution::releaseMemory() { - Scalars.clear(); + // Iterate through all the SCEVUnknown instances and call their + // destructors, so that they release their references to their values. + for (SCEVUnknown *U = FirstUnknown; U; U = U->Next) + U->~SCEVUnknown(); + FirstUnknown = 0; + + ValueExprMap.clear(); BackedgeTakenCounts.clear(); ConstantEvolutionLoopExitValue.clear(); ValuesAtScopes.clear(); diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 58711b8be59e8..93b2a8b06fbe9 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -34,14 +34,14 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - ScalarEvolutionAliasAnalysis() : FunctionPass(&ID), SE(0) {} + ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {} /// getAdjustedAnalysisPointer - This method is used when a pass implements /// an analysis interface through multiple inheritance. If needed, it /// should override this to adjust the this pointer as needed for the /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) { - if (PI->isPassID(&AliasAnalysis::ID)) + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) return (AliasAnalysis*)this; return this; } @@ -58,11 +58,8 @@ namespace { // Register this pass... char ScalarEvolutionAliasAnalysis::ID = 0; -static RegisterPass<ScalarEvolutionAliasAnalysis> -X("scev-aa", "ScalarEvolution-based Alias Analysis", false, true); - -// Declare that we implement the AliasAnalysis interface -static RegisterAnalysisGroup<AliasAnalysis> Y(X); +INITIALIZE_AG_PASS(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true, false); FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() { return new ScalarEvolutionAliasAnalysis(); @@ -158,8 +155,8 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize, Value *AO = GetBaseValue(AS); Value *BO = GetBaseValue(BS); if ((AO && AO != A) || (BO && BO != B)) - if (alias(AO ? AO : A, AO ? ~0u : ASize, - BO ? BO : B, BO ? ~0u : BSize) == NoAlias) + if (alias(AO ? AO : A, AO ? UnknownSize : ASize, + BO ? BO : B, BO ? UnknownSize : BSize) == NoAlias) return NoAlias; // Forward the query to the next analysis. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index d4a4b26e25ec4..66a06aeac43ca 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -647,6 +647,11 @@ public: bool operator()(std::pair<const Loop *, const SCEV *> LHS, std::pair<const Loop *, const SCEV *> RHS) const { + // Keep pointer operands sorted at the end. + if (LHS.second->getType()->isPointerTy() != + RHS.second->getType()->isPointerTy()) + return LHS.second->getType()->isPointerTy(); + // Compare loops with PickMostRelevantLoop. if (LHS.first != RHS.first) return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; @@ -699,8 +704,15 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // The running sum expression is a pointer. Try to form a getelementptr // at this level with that as the base. SmallVector<const SCEV *, 4> NewOps; - for (; I != E && I->first == CurLoop; ++I) - NewOps.push_back(I->second); + for (; I != E && I->first == CurLoop; ++I) { + // If the operand is SCEVUnknown and not instructions, peek through + // it, to enable more of it to be folded into the GEP. + const SCEV *X = I->second; + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X)) + if (!isa<Instruction>(U->getValue())) + X = SE.getSCEV(U->getValue()); + NewOps.push_back(X); + } Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { // The running sum is an integer, and there's a pointer at this level. @@ -1047,9 +1059,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // First check for an existing canonical IV in a suitable type. PHINode *CanonicalIV = 0; if (PHINode *PN = L->getCanonicalInductionVariable()) - if (SE.isSCEVable(PN->getType()) && - SE.getEffectiveSCEVType(PN->getType())->isIntegerTy() && - SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) + if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) CanonicalIV = PN; // Rewrite an AddRec in terms of the canonical induction variable, if @@ -1102,21 +1112,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { SE.getUnknown(expand(Rest)))); } - // {0,+,1} --> Insert a canonical induction variable into the loop! - if (S->isAffine() && S->getOperand(1)->isOne()) { - // If there's a canonical IV, just use it. - if (CanonicalIV) { - assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && - "IVs with types different from the canonical IV should " - "already have been handled!"); - return CanonicalIV; - } - + // If we don't yet have a canonical IV, create one. + if (!CanonicalIV) { // Create and insert the PHI node for the induction variable in the // specified loop. BasicBlock *Header = L->getHeader(); - PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin()); - rememberInstruction(PN); + CanonicalIV = PHINode::Create(Ty, "indvar", Header->begin()); + rememberInstruction(CanonicalIV); Constant *One = ConstantInt::get(Ty, 1); for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); @@ -1125,40 +1127,45 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (L->contains(HP)) { // Insert a unit add instruction right before the terminator // corresponding to the back-edge. - Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next", - HP->getTerminator()); + Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, + "indvar.next", + HP->getTerminator()); rememberInstruction(Add); - PN->addIncoming(Add, HP); + CanonicalIV->addIncoming(Add, HP); } else { - PN->addIncoming(Constant::getNullValue(Ty), HP); + CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); } } } + // {0,+,1} --> Insert a canonical induction variable into the loop! + if (S->isAffine() && S->getOperand(1)->isOne()) { + assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && + "IVs with types different from the canonical IV should " + "already have been handled!"); + return CanonicalIV; + } + // {0,+,F} --> {0,+,1} * F - // Get the canonical induction variable I for this loop. - Value *I = CanonicalIV ? - CanonicalIV : - getOrInsertCanonicalInductionVariable(L, Ty); // If this is a simple linear addrec, emit it now as a special case. if (S->isAffine()) // {0,+,F} --> i*F return expand(SE.getTruncateOrNoop( - SE.getMulExpr(SE.getUnknown(I), + SE.getMulExpr(SE.getUnknown(CanonicalIV), SE.getNoopOrAnyExtend(S->getOperand(1), - I->getType())), + CanonicalIV->getType())), Ty)); // If this is a chain of recurrences, turn it into a closed form, using the // folders, then expandCodeFor the closed form. This allows the folders to // simplify the expression without having to build a bunch of special code // into this folder. - const SCEV *IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV. + const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. // Promote S up to the canonical IV type, if the cast is foldable. const SCEV *NewS = S; - const SCEV *Ext = SE.getNoopOrAnyExtend(S, I->getType()); + const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); if (isa<SCEVAddRecExpr>(Ext)) NewS = Ext; @@ -1337,16 +1344,21 @@ void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { /// canonical induction variable of the specified type for the specified /// loop (inserting one if there is none). A canonical induction variable /// starts at zero and steps by one on each iteration. -Value * +PHINode * SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty) { assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); + + // Build a SCEV for {0,+,1}<L>. const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), SE.getConstant(Ty, 1), L); + + // Emit code for it. BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - Value *V = expandCodeFor(H, 0, L->getHeader()->begin()); + PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin())); if (SaveInsertBB) restoreInsertPoint(SaveInsertBB, SaveInsertPt); + return V; } diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index 563fd2fa96e2e..ac36cef89ebb5 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -26,7 +26,7 @@ using namespace llvm; /// post-inc value when we cannot) or it can end up adding extra live-ranges to /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we /// should use the post-inc value). -static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, +static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand, const Loop *L, DominatorTree *DT) { // If the user is in the loop, use the preinc value. if (L->contains(User)) return false; @@ -45,20 +45,17 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, // their uses occur in the predecessor block, not the block the PHI lives in) // should still use the post-inc value. Check for this case now. PHINode *PN = dyn_cast<PHINode>(User); - if (!PN) return false; // not a phi, not dominated by latch block. + if (!PN || !Operand) return false; // not a phi, not dominated by latch block. - // Look at all of the uses of IV by the PHI node. If any use corresponds to - // a block that is not dominated by the latch block, give up and use the + // Look at all of the uses of Operand by the PHI node. If any use corresponds + // to a block that is not dominated by the latch block, give up and use the // preincremented value. - unsigned NumUses = 0; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == IV) { - ++NumUses; - if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) - return false; - } + if (PN->getIncomingValue(i) == Operand && + !DT->dominates(LatchBlock, PN->getIncomingBlock(i))) + return false; - // Okay, all uses of IV by PN are in predecessor blocks that really are + // Okay, all uses of Operand by PN are in predecessor blocks that really are // dominated by the latch block. Use the post-incremented value. return true; } @@ -72,6 +69,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, DominatorTree &DT) { if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) return S; + if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) { const SCEV *O = X->getOperand(); const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, @@ -85,9 +83,69 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, } return S; } + + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // An addrec. This is the interesting part. + SmallVector<const SCEV *, 8> Operands; + const Loop *L = AR->getLoop(); + // The addrec conceptually uses its operands at loop entry. + Instruction *LUser = L->getHeader()->begin(); + // Transform each operand. + for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); + I != E; ++I) { + const SCEV *O = *I; + const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT); + Operands.push_back(N); + } + const SCEV *Result = SE.getAddRecExpr(Operands, L); + switch (Kind) { + default: llvm_unreachable("Unexpected transform name!"); + case NormalizeAutodetect: + if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { + const SCEV *TransformedStep = + TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), + User, OperandValToReplace, Loops, SE, DT); + Result = SE.getMinusSCEV(Result, TransformedStep); + Loops.insert(L); + } +#if 0 + // This assert is conceptually correct, but ScalarEvolution currently + // sometimes fails to canonicalize two equal SCEVs to exactly the same + // form. It's possibly a pessimization when this happens, but it isn't a + // correctness problem, so disable this assert for now. + assert(S == TransformForPostIncUse(Denormalize, Result, + User, OperandValToReplace, + Loops, SE, DT) && + "SCEV normalization is not invertible!"); +#endif + break; + case Normalize: + if (Loops.count(L)) { + const SCEV *TransformedStep = + TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), + User, OperandValToReplace, Loops, SE, DT); + Result = SE.getMinusSCEV(Result, TransformedStep); + } +#if 0 + // See the comment on the assert above. + assert(S == TransformForPostIncUse(Denormalize, Result, + User, OperandValToReplace, + Loops, SE, DT) && + "SCEV normalization is not invertible!"); +#endif + break; + case Denormalize: + if (Loops.count(L)) + Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE); + break; + } + return Result; + } + if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) { SmallVector<const SCEV *, 8> Operands; bool Changed = false; + // Transform each operand. for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); I != E; ++I) { const SCEV *O = *I; @@ -96,37 +154,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, Changed |= N != O; Operands.push_back(N); } - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - // An addrec. This is the interesting part. - const Loop *L = AR->getLoop(); - const SCEV *Result = SE.getAddRecExpr(Operands, L); - switch (Kind) { - default: llvm_unreachable("Unexpected transform name!"); - case NormalizeAutodetect: - if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace)) - if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) { - const SCEV *TransformedStep = - TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), - User, OperandValToReplace, Loops, SE, DT); - Result = SE.getMinusSCEV(Result, TransformedStep); - Loops.insert(L); - } - break; - case Normalize: - if (Loops.count(L)) { - const SCEV *TransformedStep = - TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), - User, OperandValToReplace, Loops, SE, DT); - Result = SE.getMinusSCEV(Result, TransformedStep); - } - break; - case Denormalize: - if (Loops.count(L)) - Result = SE.getAddExpr(Result, AR->getStepRecurrence(SE)); - break; - } - return Result; - } + // If any operand actually changed, return a transformed result. if (Changed) switch (S->getSCEVType()) { case scAddExpr: return SE.getAddExpr(Operands); @@ -137,6 +165,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, } return S; } + if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) { const SCEV *LO = X->getLHS(); const SCEV *RO = X->getRHS(); @@ -148,6 +177,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, return SE.getUDivExpr(LN, RN); return S; } + llvm_unreachable("Unexpected SCEV kind!"); return 0; } diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp new file mode 100644 index 0000000000000..bbfdcec3f9b4b --- /dev/null +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -0,0 +1,191 @@ +//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TypeBasedAliasAnalysis pass, which implements +// metadata-based TBAA. +// +// In LLVM IR, memory does not have types, so LLVM's own type system is not +// suitable for doing TBAA. Instead, metadata is added to the IR to describe +// a type system of a higher level language. +// +// This pass is language-independent. The type system is encoded in +// metadata. This allows this pass to support typical C and C++ TBAA, but +// it can also support custom aliasing behavior for other languages. +// +// This is a work-in-progress. It doesn't work yet, and the metadata +// format isn't stable. +// +// TODO: getModRefBehavior. The AliasAnalysis infrastructure will need to +// be extended. +// TODO: AA chaining +// TODO: struct fields +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Module.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + /// TBAANode - This is a simple wrapper around an MDNode which provides a + /// higher-level interface by hiding the details of how alias analysis + /// information is encoded in its operands. + class TBAANode { + const MDNode *Node; + + public: + TBAANode() : Node(0) {} + explicit TBAANode(MDNode *N) : Node(N) {} + + /// getNode - Get the MDNode for this TBAANode. + const MDNode *getNode() const { return Node; } + + /// getParent - Get this TBAANode's Alias DAG parent. + TBAANode getParent() const { + if (Node->getNumOperands() < 2) + return TBAANode(); + MDNode *P = dyn_cast<MDNode>(Node->getOperand(1)); + if (!P) + return TBAANode(); + // Ok, this node has a valid parent. Return it. + return TBAANode(P); + } + + /// TypeIsImmutable - Test if this TBAANode represents a type for objects + /// which are not modified (by any means) in the context where this + /// AliasAnalysis is relevant. + bool TypeIsImmutable() const { + if (Node->getNumOperands() < 3) + return false; + ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2)); + if (!CI) + return false; + // TODO: Think about the encoding. + return CI->isOne(); + } + }; +} + +namespace { + /// TypeBasedAliasAnalysis - This is a simple alias analysis + /// implementation that uses TypeBased to answer queries. + class TypeBasedAliasAnalysis : public ImmutablePass, + public AliasAnalysis { + public: + static char ID; // Class identification, replacement for typeinfo + TypeBasedAliasAnalysis() : ImmutablePass(ID) {} + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + virtual bool pointsToConstantMemory(const Value *P); + }; +} // End of anonymous namespace + +// Register this pass... +char TypeBasedAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa", + "Type-Based Alias Analysis", false, true, false); + +ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { + return new TypeBasedAliasAnalysis(); +} + +void +TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +AliasAnalysis::AliasResult +TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize, + const Value *B, unsigned BSize) { + // Currently, metadata can only be attached to Instructions. + const Instruction *AI = dyn_cast<Instruction>(A); + if (!AI) return MayAlias; + const Instruction *BI = dyn_cast<Instruction>(B); + if (!BI) return MayAlias; + + // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must + // be conservative. + MDNode *AM = + AI->getMetadata(AI->getParent()->getParent()->getParent() + ->getMDKindID("tbaa")); + if (!AM) return MayAlias; + MDNode *BM = + BI->getMetadata(BI->getParent()->getParent()->getParent() + ->getMDKindID("tbaa")); + if (!BM) return MayAlias; + + // Keep track of the root node for A and B. + TBAANode RootA, RootB; + + // Climb the DAG from A to see if we reach B. + for (TBAANode T(AM); ; ) { + if (T.getNode() == BM) + // B is an ancestor of A. + return MayAlias; + + RootA = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Climb the DAG from B to see if we reach A. + for (TBAANode T(BM); ; ) { + if (T.getNode() == AM) + // A is an ancestor of B. + return MayAlias; + + RootB = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Neither node is an ancestor of the other. + + // If they have the same root, then we've proved there's no alias. + if (RootA.getNode() == RootB.getNode()) + return NoAlias; + + // If they have different roots, they're part of different potentially + // unrelated type systems, so we must be conservative. + return MayAlias; +} + +bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Value *P) { + // Currently, metadata can only be attached to Instructions. + const Instruction *I = dyn_cast<Instruction>(P); + if (!I) return false; + + MDNode *M = + I->getMetadata(I->getParent()->getParent()->getParent() + ->getMDKindID("tbaa")); + if (!M) return false; + + // If this is an "immutable" type, we can assume the pointer is pointing + // to constant memory. + return TBAANode(M).TypeIsImmutable(); +} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index b4c9884a20ed8..181c9b01980c3 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -880,19 +880,20 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, } Value *Mul0 = NULL; - Value *Mul1 = NULL; - bool M0 = ComputeMultiple(Op0, Base, Mul0, - LookThroughSExt, Depth+1); - bool M1 = ComputeMultiple(Op1, Base, Mul1, - LookThroughSExt, Depth+1); - - if (M0) { - if (isa<Constant>(Op1) && isa<Constant>(Mul0)) { - // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) - Multiple = ConstantExpr::getMul(cast<Constant>(Mul0), - cast<Constant>(Op1)); - return true; - } + if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { + if (Constant *Op1C = dyn_cast<Constant>(Op1)) + if (Constant *MulC = dyn_cast<Constant>(Mul0)) { + if (Op1C->getType()->getPrimitiveSizeInBits() < + MulC->getType()->getPrimitiveSizeInBits()) + Op1C = ConstantExpr::getZExt(Op1C, MulC->getType()); + if (Op1C->getType()->getPrimitiveSizeInBits() > + MulC->getType()->getPrimitiveSizeInBits()) + MulC = ConstantExpr::getZExt(MulC, Op1C->getType()); + + // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) + Multiple = ConstantExpr::getMul(MulC, Op1C); + return true; + } if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0)) if (Mul0CI->getValue() == 1) { @@ -902,13 +903,21 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, } } - if (M1) { - if (isa<Constant>(Op0) && isa<Constant>(Mul1)) { - // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) - Multiple = ConstantExpr::getMul(cast<Constant>(Mul1), - cast<Constant>(Op0)); - return true; - } + Value *Mul1 = NULL; + if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { + if (Constant *Op0C = dyn_cast<Constant>(Op0)) + if (Constant *MulC = dyn_cast<Constant>(Mul1)) { + if (Op0C->getType()->getPrimitiveSizeInBits() < + MulC->getType()->getPrimitiveSizeInBits()) + Op0C = ConstantExpr::getZExt(Op0C, MulC->getType()); + if (Op0C->getType()->getPrimitiveSizeInBits() > + MulC->getType()->getPrimitiveSizeInBits()) + MulC = ConstantExpr::getZExt(MulC, Op0C->getType()); + + // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) + Multiple = ConstantExpr::getMul(MulC, Op0C); + return true; + } if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1)) if (Mul1CI->getValue() == 1) { @@ -973,195 +982,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return false; } - -/// GetLinearExpression - Analyze the specified value as a linear expression: -/// "A*V + B", where A and B are constant integers. Return the scale and offset -/// values as APInts and return V as a Value*. The incoming Value is known to -/// have IntegerType. Note that this looks through extends, so the high bits -/// may not be represented in the result. -static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, - const TargetData *TD, unsigned Depth) { - assert(V->getType()->isIntegerTy() && "Not an integer value"); - - // Limit our recursion depth. - if (Depth == 6) { - Scale = 1; - Offset = 0; - return V; - } - - if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) { - if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) { - switch (BOp->getOpcode()) { - default: break; - case Instruction::Or: - // X|C == X+C if all the bits in C are unset in X. Otherwise we can't - // analyze it. - if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), TD)) - break; - // FALL THROUGH. - case Instruction::Add: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1); - Offset += RHSC->getValue(); - return V; - case Instruction::Mul: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1); - Offset *= RHSC->getValue(); - Scale *= RHSC->getValue(); - return V; - case Instruction::Shl: - V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1); - Offset <<= RHSC->getValue().getLimitedValue(); - Scale <<= RHSC->getValue().getLimitedValue(); - return V; - } - } - } - - // Since clients don't care about the high bits of the value, just scales and - // offsets, we can look through extensions. - if (isa<SExtInst>(V) || isa<ZExtInst>(V)) { - Value *CastOp = cast<CastInst>(V)->getOperand(0); - unsigned OldWidth = Scale.getBitWidth(); - unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); - Scale.trunc(SmallWidth); - Offset.trunc(SmallWidth); - Value *Result = GetLinearExpression(CastOp, Scale, Offset, TD, Depth+1); - Scale.zext(OldWidth); - Offset.zext(OldWidth); - return Result; - } - - Scale = 1; - Offset = 0; - return V; -} - -/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it -/// into a base pointer with a constant offset and a number of scaled symbolic -/// offsets. -/// -/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in -/// the VarIndices vector) are Value*'s that are known to be scaled by the -/// specified amount, but which may have other unrepresented high bits. As such, -/// the gep cannot necessarily be reconstructed from its decomposed form. -/// -/// When TargetData is around, this function is capable of analyzing everything -/// that Value::getUnderlyingObject() can look through. When not, it just looks -/// through pointer casts. -/// -const Value *llvm::DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, - SmallVectorImpl<std::pair<const Value*, int64_t> > &VarIndices, - const TargetData *TD) { - // Limit recursion depth to limit compile time in crazy cases. - unsigned MaxLookup = 6; - - BaseOffs = 0; - do { - // See if this is a bitcast or GEP. - const Operator *Op = dyn_cast<Operator>(V); - if (Op == 0) { - // The only non-operator case we can handle are GlobalAliases. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { - if (!GA->mayBeOverridden()) { - V = GA->getAliasee(); - continue; - } - } - return V; - } - - if (Op->getOpcode() == Instruction::BitCast) { - V = Op->getOperand(0); - continue; - } - - const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); - if (GEPOp == 0) - return V; - - // Don't attempt to analyze GEPs over unsized objects. - if (!cast<PointerType>(GEPOp->getOperand(0)->getType()) - ->getElementType()->isSized()) - return V; - - // If we are lacking TargetData information, we can't compute the offets of - // elements computed by GEPs. However, we can handle bitcast equivalent - // GEPs. - if (!TD) { - if (!GEPOp->hasAllZeroIndices()) - return V; - V = GEPOp->getOperand(0); - continue; - } - - // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. - gep_type_iterator GTI = gep_type_begin(GEPOp); - for (User::const_op_iterator I = GEPOp->op_begin()+1, - E = GEPOp->op_end(); I != E; ++I) { - Value *Index = *I; - // Compute the (potentially symbolic) offset in bytes for this index. - if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { - // For a struct, add the member offset. - unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); - if (FieldNo == 0) continue; - - BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo); - continue; - } - - // For an array/pointer, add the element offset, explicitly scaled. - if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { - if (CIdx->isZero()) continue; - BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); - continue; - } - - uint64_t Scale = TD->getTypeAllocSize(*GTI); - - // Use GetLinearExpression to decompose the index into a C1*V+C2 form. - unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth(); - APInt IndexScale(Width, 0), IndexOffset(Width, 0); - Index = GetLinearExpression(Index, IndexScale, IndexOffset, TD, 0); - - // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. - // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. - BaseOffs += IndexOffset.getZExtValue()*Scale; - Scale *= IndexScale.getZExtValue(); - - - // If we already had an occurrance of this index variable, merge this - // scale into it. For example, we want to handle: - // A[x][x] -> x*16 + x*4 -> x*20 - // This also ensures that 'x' only appears in the index list once. - for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) { - if (VarIndices[i].first == Index) { - Scale += VarIndices[i].second; - VarIndices.erase(VarIndices.begin()+i); - break; - } - } - - // Make sure that we have a scale that makes sense for this target's - // pointer size. - if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) { - Scale <<= ShiftBits; - Scale >>= ShiftBits; - } - - if (Scale) - VarIndices.push_back(std::make_pair(Index, Scale)); - } - - // Analyze the base pointer next. - V = GEPOp->getOperand(0); - } while (--MaxLookup); - - // If the chain of expressions is too deep, just return early. - return V; -} - - // This is the recursive version of BuildSubAggregate. It takes a few different // arguments. Idxs is the index within the nested struct From that we are // looking at now (which is of type IndexedType). IdxSkip is the number of diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index f4c0e50fd94df..032753a3b2c61 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -493,6 +493,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(private); KEYWORD(linker_private); KEYWORD(linker_private_weak); + KEYWORD(linker_private_weak_def_auto); KEYWORD(internal); KEYWORD(available_externally); KEYWORD(linkonce); @@ -572,7 +573,6 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(type); KEYWORD(opaque); - KEYWORD(union); KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 221b994db55fa..f21a065473b62 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -199,6 +199,7 @@ bool LLParser::ParseTopLevelEntities() { case lltok::kw_private: // OptionalLinkage case lltok::kw_linker_private: // OptionalLinkage case lltok::kw_linker_private_weak: // OptionalLinkage + case lltok::kw_linker_private_weak_def_auto: // OptionalLinkage case lltok::kw_internal: // OptionalLinkage case lltok::kw_weak: // OptionalLinkage case lltok::kw_weak_odr: // OptionalLinkage @@ -517,11 +518,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) { if (Result) return false; // Otherwise, create MDNode forward reference. - - // FIXME: This is not unique enough! - std::string FwdRefName = "llvm.mdnode.fwdref." + utostr(MID); - Value *V = MDString::get(Context, FwdRefName); - MDNode *FwdNode = MDNode::get(Context, &V, 1); + MDNode *FwdNode = MDNode::getTemporary(Context, 0, 0); ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc()); if (NumberedMetadata.size() <= MID) @@ -543,27 +540,20 @@ bool LLParser::ParseNamedMetadata() { ParseToken(lltok::lbrace, "Expected '{' here")) return true; - SmallVector<MDNode *, 8> Elts; + NamedMDNode *NMD = M->getOrInsertNamedMetadata(Name); if (Lex.getKind() != lltok::rbrace) do { - // Null is a special case since it is typeless. - if (EatIfPresent(lltok::kw_null)) { - Elts.push_back(0); - continue; - } - if (ParseToken(lltok::exclaim, "Expected '!' here")) return true; MDNode *N = 0; if (ParseMDNodeID(N)) return true; - Elts.push_back(N); + NMD->addOperand(N); } while (EatIfPresent(lltok::comma)); if (ParseToken(lltok::rbrace, "expected end of metadata node")) return true; - NamedMDNode::Create(Context, Name, Elts.data(), Elts.size(), M); return false; } @@ -592,7 +582,9 @@ bool LLParser::ParseStandaloneMetadata() { std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> >::iterator FI = ForwardRefMDNodes.find(MetadataID); if (FI != ForwardRefMDNodes.end()) { - FI->second.first->replaceAllUsesWith(Init); + MDNode *Temp = FI->second.first; + Temp->replaceAllUsesWith(Init); + MDNode::deleteTemporary(Temp); ForwardRefMDNodes.erase(FI); assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work"); @@ -632,7 +624,8 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, Linkage != GlobalValue::InternalLinkage && Linkage != GlobalValue::PrivateLinkage && Linkage != GlobalValue::LinkerPrivateLinkage && - Linkage != GlobalValue::LinkerPrivateWeakLinkage) + Linkage != GlobalValue::LinkerPrivateWeakLinkage && + Linkage != GlobalValue::LinkerPrivateWeakDefAutoLinkage) return Error(LinkageLoc, "invalid linkage type for alias"); Constant *Aliasee; @@ -1017,6 +1010,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { /// ::= 'private' /// ::= 'linker_private' /// ::= 'linker_private_weak' +/// ::= 'linker_private_weak_def_auto' /// ::= 'internal' /// ::= 'weak' /// ::= 'weak_odr' @@ -1038,6 +1032,9 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { case lltok::kw_linker_private_weak: Res = GlobalValue::LinkerPrivateWeakLinkage; break; + case lltok::kw_linker_private_weak_def_auto: + Res = GlobalValue::LinkerPrivateWeakDefAutoLinkage; + break; case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break; case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break; case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break; @@ -1120,29 +1117,44 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { /// ParseInstructionMetadata /// ::= !dbg !42 (',' !dbg !57)* -bool LLParser::ParseInstructionMetadata(Instruction *Inst) { +bool LLParser::ParseInstructionMetadata(Instruction *Inst, + PerFunctionState *PFS) { do { if (Lex.getKind() != lltok::MetadataVar) return TokError("expected metadata after comma"); std::string Name = Lex.getStrVal(); + unsigned MDK = M->getMDKindID(Name.c_str()); Lex.Lex(); MDNode *Node; unsigned NodeID; SMLoc Loc = Lex.getLoc(); - if (ParseToken(lltok::exclaim, "expected '!' here") || - ParseMDNodeID(Node, NodeID)) + + if (ParseToken(lltok::exclaim, "expected '!' here")) return true; - unsigned MDK = M->getMDKindID(Name.c_str()); - if (Node) { - // If we got the node, add it to the instruction. - Inst->setMetadata(MDK, Node); + // This code is similar to that of ParseMetadataValue, however it needs to + // have special-case code for a forward reference; see the comments on + // ForwardRefInstMetadata for details. Also, MDStrings are not supported + // at the top level here. + if (Lex.getKind() == lltok::lbrace) { + ValID ID; + if (ParseMetadataListValue(ID, PFS)) + return true; + assert(ID.Kind == ValID::t_MDNode); + Inst->setMetadata(MDK, ID.MDNodeVal); } else { - MDRef R = { Loc, MDK, NodeID }; - // Otherwise, remember that this should be resolved later. - ForwardRefInstMetadata[Inst].push_back(R); + if (ParseMDNodeID(Node, NodeID)) + return true; + if (Node) { + // If we got the node, add it to the instruction. + Inst->setMetadata(MDK, Node); + } else { + MDRef R = { Loc, MDK, NodeID }; + // Otherwise, remember that this should be resolved later. + ForwardRefInstMetadata[Inst].push_back(R); + } } // If this is the end of the list, we're done. @@ -1161,6 +1173,8 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) { if (ParseUInt32(Alignment)) return true; if (!isPowerOf2_32(Alignment)) return Error(AlignLoc, "alignment is not a power of two"); + if (Alignment > Value::MaximumAlignment) + return Error(AlignLoc, "huge alignments are not supported yet"); return false; } @@ -1183,6 +1197,7 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment, if (Lex.getKind() != lltok::kw_align) return Error(Lex.getLoc(), "expected metadata or 'align'"); + LocTy AlignLoc = Lex.getLoc(); if (ParseOptionalAlignment(Alignment)) return true; } @@ -1344,11 +1359,6 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) { if (ParseStructType(Result, false)) return true; break; - case lltok::kw_union: - // TypeRec ::= 'union' '{' ... '}' - if (ParseUnionType(Result)) - return true; - break; case lltok::lsquare: // TypeRec ::= '[' ... ']' Lex.Lex(); // eat the lsquare. @@ -1658,38 +1668,6 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) { return false; } -/// ParseUnionType -/// TypeRec -/// ::= 'union' '{' TypeRec (',' TypeRec)* '}' -bool LLParser::ParseUnionType(PATypeHolder &Result) { - assert(Lex.getKind() == lltok::kw_union); - Lex.Lex(); // Consume the 'union' - - if (ParseToken(lltok::lbrace, "'{' expected after 'union'")) return true; - - SmallVector<PATypeHolder, 8> ParamsList; - do { - LocTy EltTyLoc = Lex.getLoc(); - if (ParseTypeRec(Result)) return true; - ParamsList.push_back(Result); - - if (Result->isVoidTy()) - return Error(EltTyLoc, "union element can not have void type"); - if (!UnionType::isValidElementType(Result)) - return Error(EltTyLoc, "invalid element type for union"); - - } while (EatIfPresent(lltok::comma)) ; - - if (ParseToken(lltok::rbrace, "expected '}' at end of union")) - return true; - - SmallVector<const Type*, 8> ParamsListTy; - for (unsigned i = 0, e = ParamsList.size(); i != e; ++i) - ParamsListTy.push_back(ParamsList[i].get()); - Result = HandleUpRefs(UnionType::get(&ParamsListTy[0], ParamsListTy.size())); - return false; -} - /// ParseArrayVectorType - Parse an array or vector type, assuming the first /// token has already been consumed. /// TypeRec @@ -2504,6 +2482,20 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) { return false; } +bool LLParser::ParseMetadataListValue(ValID &ID, PerFunctionState *PFS) { + assert(Lex.getKind() == lltok::lbrace); + Lex.Lex(); + + SmallVector<Value*, 16> Elts; + if (ParseMDNodeVector(Elts, PFS) || + ParseToken(lltok::rbrace, "expected end of metadata node")) + return true; + + ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size()); + ID.Kind = ValID::t_MDNode; + return false; +} + /// ParseMetadataValue /// ::= !42 /// ::= !{...} @@ -2514,16 +2506,8 @@ bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) { // MDNode: // !{ ... } - if (EatIfPresent(lltok::lbrace)) { - SmallVector<Value*, 16> Elts; - if (ParseMDNodeVector(Elts, PFS) || - ParseToken(lltok::rbrace, "expected end of metadata node")) - return true; - - ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size()); - ID.Kind = ValID::t_MDNode; - return false; - } + if (Lex.getKind() == lltok::lbrace) + return ParseMetadataListValue(ID, PFS); // Standalone metadata reference // !42 @@ -2635,16 +2619,8 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, V = Constant::getNullValue(Ty); return false; case ValID::t_Constant: - if (ID.ConstantVal->getType() != Ty) { - // Allow a constant struct with a single member to be converted - // to a union, if the union has a member which is the same type - // as the struct member. - if (const UnionType* utype = dyn_cast<UnionType>(Ty)) { - return ParseUnionValue(utype, ID, V); - } - + if (ID.ConstantVal->getType() != Ty) return Error(ID.Loc, "constant expression type mismatch"); - } V = ID.ConstantVal; return false; @@ -2675,22 +2651,6 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc, return false; } -bool LLParser::ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V) { - if (const StructType* stype = dyn_cast<StructType>(ID.ConstantVal->getType())) { - if (stype->getNumContainedTypes() != 1) - return Error(ID.Loc, "constant expression type mismatch"); - int index = utype->getElementTypeIndex(stype->getContainedType(0)); - if (index < 0) - return Error(ID.Loc, "initializer type is not a member of the union"); - - V = ConstantUnion::get( - utype, cast<Constant>(ID.ConstantVal->getOperand(0))); - return false; - } - - return Error(ID.Loc, "constant expression type mismatch"); -} - /// FunctionHeader /// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs @@ -2724,6 +2684,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { case GlobalValue::PrivateLinkage: case GlobalValue::LinkerPrivateLinkage: case GlobalValue::LinkerPrivateWeakLinkage: + case GlobalValue::LinkerPrivateWeakDefAutoLinkage: case GlobalValue::InternalLinkage: case GlobalValue::AvailableExternallyLinkage: case GlobalValue::LinkOnceAnyLinkage: @@ -2980,7 +2941,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { // With a normal result, we check to see if the instruction is followed by // a comma and metadata. if (EatIfPresent(lltok::comma)) - if (ParseInstructionMetadata(Inst)) + if (ParseInstructionMetadata(Inst, &PFS)) return true; break; case InstExtraComma: @@ -2988,7 +2949,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { // If the instruction parser ate an extra comma at the end of it, it // *must* be followed by metadata. - if (ParseInstructionMetadata(Inst)) + if (ParseInstructionMetadata(Inst, &PFS)) return true; break; } diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index f765a2ae4e6ce..404cec3ed7c74 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -32,7 +32,6 @@ namespace llvm { class GlobalValue; class MDString; class MDNode; - class UnionType; /// ValID - Represents a reference of a definition of some sort with no type. /// There are several cases where we have to parse the value but where the @@ -80,6 +79,14 @@ namespace llvm { // Instruction metadata resolution. Each instruction can have a list of // MDRef info associated with them. + // + // The simpler approach of just creating temporary MDNodes and then calling + // RAUW on them when the definition is processed doesn't work because some + // instruction metadata kinds, such as dbg, get stored in the IR in an + // "optimized" format which doesn't participate in the normal value use + // lists. This means that RAUW doesn't work, even on temporary MDNodes + // which otherwise support RAUW. Instead, we defer resolving MDNode + // references until the definitions have been processed. struct MDRef { SMLoc Loc; unsigned MDKind, MDSlot; @@ -180,7 +187,6 @@ namespace llvm { bool ParseOptionalCallingConv(CallingConv::ID &CC); bool ParseOptionalAlignment(unsigned &Alignment); bool ParseOptionalStackAlignment(unsigned &Alignment); - bool ParseInstructionMetadata(Instruction *Inst); bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma); bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) { @@ -222,7 +228,6 @@ namespace llvm { } bool ParseTypeRec(PATypeHolder &H); bool ParseStructType(PATypeHolder &H, bool Packed); - bool ParseUnionType(PATypeHolder &H); bool ParseArrayVectorType(PATypeHolder &H, bool isVector); bool ParseFunctionType(PATypeHolder &Result); PATypeHolder HandleUpRefs(const Type *Ty); @@ -291,7 +296,6 @@ namespace llvm { return ParseTypeAndBasicBlock(BB, Loc, PFS); } - bool ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V); struct ParamInfo { LocTy Loc; @@ -308,8 +312,10 @@ namespace llvm { bool ParseGlobalValue(const Type *Ty, Constant *&V); bool ParseGlobalTypeAndValue(Constant *&V); bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts); + bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS); bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS); bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS); + bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS); // Function Parsing. struct ArgInfo { diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 2703134ec1a96..61f93a4274981 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -37,7 +37,8 @@ namespace lltok { kw_declare, kw_define, kw_global, kw_constant, - kw_private, kw_linker_private, kw_linker_private_weak, kw_internal, + kw_private, kw_linker_private, kw_linker_private_weak, + kw_linker_private_weak_def_auto, kw_internal, kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending, kw_dllimport, kw_dllexport, kw_common, kw_available_externally, kw_default, kw_hidden, kw_protected, @@ -97,7 +98,6 @@ namespace lltok { kw_type, kw_opaque, - kw_union, kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule, kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno, diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index e511cbe29c756..e7cef9b5c3c58 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -45,8 +45,7 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err, MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr); if (F == 0) { Err = SMDiagnostic(Filename, - "Could not open input file '" + Filename + "': " + - ErrorStr); + "Could not open input file: " + ErrorStr); return 0; } diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index b3f0776d29d54..830c79aa3b54e 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -39,6 +39,7 @@ void BitcodeReader::FreeState() { std::vector<BasicBlock*>().swap(FunctionBBs); std::vector<Function*>().swap(FunctionsWithBodies); DeferredFunctionInfo.clear(); + MDKindMap.clear(); } //===----------------------------------------------------------------------===// @@ -76,6 +77,7 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) { case 12: return GlobalValue::AvailableExternallyLinkage; case 13: return GlobalValue::LinkerPrivateLinkage; case 14: return GlobalValue::LinkerPrivateWeakLinkage; + case 15: return GlobalValue::LinkerPrivateWeakDefAutoLinkage; } } @@ -295,8 +297,6 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() { } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) { NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(), UserCS->getType()->isPacked()); - } else if (ConstantUnion *UserCU = dyn_cast<ConstantUnion>(UserC)) { - NewC = ConstantUnion::get(UserCU->getType(), NewOps[0]); } else if (isa<ConstantVector>(UserC)) { NewC = ConstantVector::get(&NewOps[0], NewOps.size()); } else { @@ -332,9 +332,9 @@ void BitcodeReaderMDValueList::AssignValue(Value *V, unsigned Idx) { } // If there was a forward reference to this value, replace it. - Value *PrevVal = OldV; + MDNode *PrevVal = cast<MDNode>(OldV); OldV->replaceAllUsesWith(V); - delete PrevVal; + MDNode::deleteTemporary(PrevVal); // Deleting PrevVal sets Idx value in MDValuePtrs to null. Set new // value for Idx. MDValuePtrs[Idx] = V; @@ -350,7 +350,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) { } // Create and return a placeholder, which will later be RAUW'd. - Value *V = new Argument(Type::getMetadataTy(Context)); + Value *V = MDNode::getTemporary(Context, 0, 0); MDValuePtrs[Idx] = V; return V; } @@ -589,13 +589,6 @@ bool BitcodeReader::ParseTypeTable() { ResultTy = StructType::get(Context, EltTys, Record[0]); break; } - case bitc::TYPE_CODE_UNION: { // UNION: [eltty x N] - SmallVector<const Type*, 8> EltTys; - for (unsigned i = 0, e = Record.size(); i != e; ++i) - EltTys.push_back(getTypeByID(Record[i], true)); - ResultTy = UnionType::get(&EltTys[0], EltTys.size()); - break; - } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] if (Record.size() < 2) return Error("Invalid ARRAY type record"); @@ -781,7 +774,8 @@ bool BitcodeReader::ParseMetadata() { bool IsFunctionLocal = false; // Read a record. Record.clear(); - switch (Stream.ReadRecord(Code, Record)) { + Code = Stream.ReadRecord(Code, Record); + switch (Code) { default: // Default behavior: ignore. break; case bitc::METADATA_NAME: { @@ -794,34 +788,46 @@ bool BitcodeReader::ParseMetadata() { Record.clear(); Code = Stream.ReadCode(); - // METADATA_NAME is always followed by METADATA_NAMED_NODE. - if (Stream.ReadRecord(Code, Record) != bitc::METADATA_NAMED_NODE) + // METADATA_NAME is always followed by METADATA_NAMED_NODE2. + // Or METADATA_NAMED_NODE in LLVM 2.7. FIXME: Remove this in LLVM 3.0. + unsigned NextBitCode = Stream.ReadRecord(Code, Record); + if (NextBitCode == bitc::METADATA_NAMED_NODE) { + LLVM2_7MetadataDetected = true; + } else if (NextBitCode != bitc::METADATA_NAMED_NODE2) assert ( 0 && "Inavlid Named Metadata record"); // Read named metadata elements. unsigned Size = Record.size(); - SmallVector<MDNode *, 8> Elts; + NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name); for (unsigned i = 0; i != Size; ++i) { - if (Record[i] == ~0U) { - Elts.push_back(NULL); - continue; - } MDNode *MD = dyn_cast<MDNode>(MDValueList.getValueFwdRef(Record[i])); if (MD == 0) return Error("Malformed metadata record"); - Elts.push_back(MD); + NMD->addOperand(MD); } - Value *V = NamedMDNode::Create(Context, Name.str(), Elts.data(), - Elts.size(), TheModule); - MDValueList.AssignValue(V, NextMDValueNo++); + // Backwards compatibility hack: NamedMDValues used to be Values, + // and they got their own slots in the value numbering. They are no + // longer Values, however we still need to account for them in the + // numbering in order to be able to read old bitcode files. + // FIXME: Remove this in LLVM 3.0. + if (LLVM2_7MetadataDetected) + MDValueList.AssignValue(0, NextMDValueNo++); break; } - case bitc::METADATA_FN_NODE: + case bitc::METADATA_FN_NODE: // FIXME: Remove in LLVM 3.0. + case bitc::METADATA_FN_NODE2: IsFunctionLocal = true; // fall-through - case bitc::METADATA_NODE: { + case bitc::METADATA_NODE: // FIXME: Remove in LLVM 3.0. + case bitc::METADATA_NODE2: { + + // Detect 2.7-era metadata. + // FIXME: Remove in LLVM 3.0. + if (Code == bitc::METADATA_FN_NODE || Code == bitc::METADATA_NODE) + LLVM2_7MetadataDetected = true; + if (Record.size() % 2 == 1) - return Error("Invalid METADATA_NODE record"); + return Error("Invalid METADATA_NODE2 record"); unsigned Size = Record.size(); SmallVector<Value*, 8> Elts; @@ -859,13 +865,12 @@ bool BitcodeReader::ParseMetadata() { SmallString<8> Name; Name.resize(RecordLength-1); unsigned Kind = Record[0]; - (void) Kind; for (unsigned i = 1; i != RecordLength; ++i) Name[i-1] = Record[i]; unsigned NewKind = TheModule->getMDKindID(Name.str()); - assert(Kind == NewKind && - "FIXME: Unable to handle custom metadata mismatch!");(void)NewKind; + if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second) + return Error("Conflicting METADATA_KIND records"); break; } } @@ -1020,11 +1025,6 @@ bool BitcodeReader::ParseConstants() { Elts.push_back(ValueList.getConstantFwdRef(Record[i], STy->getElementType(i))); V = ConstantStruct::get(STy, Elts); - } else if (const UnionType *UnTy = dyn_cast<UnionType>(CurTy)) { - uint64_t Index = Record[0]; - Constant *Val = ValueList.getConstantFwdRef(Record[1], - UnTy->getElementType(Index)); - V = ConstantUnion::get(UnTy, Val); } else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) { const Type *EltTy = ATy->getElementType(); for (unsigned i = 0; i != Size; ++i) @@ -1297,6 +1297,12 @@ bool BitcodeReader::ParseModule() { UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn)); } + // Look for global variables which need to be renamed. + for (Module::global_iterator + GI = TheModule->global_begin(), GE = TheModule->global_end(); + GI != GE; ++GI) + UpgradeGlobalVariable(GI); + // Force deallocation of memory for these vectors to favor the client that // want lazy deserialization. std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits); @@ -1614,15 +1620,22 @@ bool BitcodeReader::ParseMetadataAttachment() { switch (Stream.ReadRecord(Code, Record)) { default: // Default behavior: ignore. break; - case bitc::METADATA_ATTACHMENT: { + // FIXME: Remove in LLVM 3.0. + case bitc::METADATA_ATTACHMENT: + LLVM2_7MetadataDetected = true; + case bitc::METADATA_ATTACHMENT2: { unsigned RecordLength = Record.size(); if (Record.empty() || (RecordLength - 1) % 2 == 1) return Error ("Invalid METADATA_ATTACHMENT reader!"); Instruction *Inst = InstructionList[Record[0]]; for (unsigned i = 1; i != RecordLength; i = i+2) { unsigned Kind = Record[i]; + DenseMap<unsigned, unsigned>::iterator I = + MDKindMap.find(Kind); + if (I == MDKindMap.end()) + return Error("Invalid metadata kind ID"); Value *Node = MDValueList.getValueFwdRef(Record[i+1]); - Inst->setMetadata(Kind, cast<MDNode>(Node)); + Inst->setMetadata(I->second, cast<MDNode>(Node)); } break; } @@ -1638,6 +1651,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { InstructionList.clear(); unsigned ModuleValueListSize = ValueList.size(); + unsigned ModuleMDValueListSize = MDValueList.size(); // Add all the function arguments to the value table. for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) @@ -1722,7 +1736,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { I = 0; continue; - case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia] + // FIXME: Remove this in LLVM 3.0. + case bitc::FUNC_CODE_DEBUG_LOC: + LLVM2_7MetadataDetected = true; + case bitc::FUNC_CODE_DEBUG_LOC2: { // DEBUG_LOC: [line, col, scope, ia] I = 0; // Get the last instruction emitted. if (CurBB && !CurBB->empty()) I = &CurBB->back(); @@ -1988,6 +2005,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } while(OpNum != Record.size()); const Type *ReturnType = F->getReturnType(); + // Handle multiple return values. FIXME: Remove in LLVM 3.0. if (Vs.size() > 1 || (ReturnType->isStructTy() && (Vs.empty() || Vs[0]->getType() != ReturnType))) { @@ -2183,7 +2201,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align] // For backward compatibility, tolerate a lack of an opty, and use i32. - // LLVM 3.0: Remove this. + // Remove this in LLVM 3.0. if (Record.size() < 3 || Record.size() > 4) return Error("Invalid ALLOCA record"); unsigned OpNum = 0; @@ -2236,7 +2254,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { InstructionList.push_back(I); break; } - case bitc::FUNC_CODE_INST_CALL: { + // FIXME: Remove this in LLVM 3.0. + case bitc::FUNC_CODE_INST_CALL: + LLVM2_7MetadataDetected = true; + case bitc::FUNC_CODE_INST_CALL2: { // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...] if (Record.size() < 3) return Error("Invalid CALL record"); @@ -2324,7 +2345,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (A->getParent() == 0) { // We found at least one unresolved value. Nuke them all to avoid leaks. for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){ - if ((A = dyn_cast<Argument>(ValueList.back())) && A->getParent() == 0) { + if ((A = dyn_cast<Argument>(ValueList[i])) && A->getParent() == 0) { A->replaceAllUsesWith(UndefValue::get(A->getType())); delete A; } @@ -2333,6 +2354,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } } + // FIXME: Check for unresolved forward-declared metadata references + // and clean up leaks. + // See if anything took the address of blocks in this function. If so, // resolve them now. DenseMap<Function*, std::vector<BlockAddrRefTy> >::iterator BAFRI = @@ -2352,8 +2376,21 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { BlockAddrFwdRefs.erase(BAFRI); } + // FIXME: Remove this in LLVM 3.0. + unsigned NewMDValueListSize = MDValueList.size(); + // Trim the value list down to the size it was before we parsed this function. ValueList.shrinkTo(ModuleValueListSize); + MDValueList.shrinkTo(ModuleMDValueListSize); + + // Backwards compatibility hack: Function-local metadata numbers + // were previously not reset between functions. This is now fixed, + // however we still need to understand the old numbering in order + // to be able to read old bitcode files. + // FIXME: Remove this in LLVM 3.0. + if (LLVM2_7MetadataDetected) + MDValueList.resize(NewMDValueListSize); + std::vector<BasicBlock*>().swap(FunctionBBs); return false; diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index 55c71f7c886fe..053121bdad6e5 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -156,6 +156,9 @@ class BitcodeReader : public GVMaterializer { // stored here with their replacement function. typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap; UpgradedIntrinsicMap UpgradedIntrinsics; + + // Map the bitcode's custom MDKind ID to the Module's MDKind ID. + DenseMap<unsigned, unsigned> MDKindMap; // After the module header has been read, the FunctionsWithBodies list is // reversed. This keeps track of whether we've done this yet. @@ -170,11 +173,18 @@ class BitcodeReader : public GVMaterializer { /// are resolved lazily when functions are loaded. typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy; DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs; + + /// LLVM2_7MetadataDetected - True if metadata produced by LLVM 2.7 or + /// earlier was detected, in which case we behave slightly differently, + /// for compatibility. + /// FIXME: Remove in LLVM 3.0. + bool LLVM2_7MetadataDetected; public: explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C) : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false), - ErrorString(0), ValueList(C), MDValueList(C) { + ErrorString(0), ValueList(C), MDValueList(C), + LLVM2_7MetadataDetected(false) { HasReversedFunctionsWithBodies = false; } ~BitcodeReader() { diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index fa1b2c4bee2b2..7b6fc6cd928df 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -181,14 +181,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Log2_32_Ceil(VE.getTypes().size()+1))); unsigned StructAbbrev = Stream.EmitAbbrev(Abbv); - // Abbrev for TYPE_CODE_UNION. - Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); - unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv); - // Abbrev for TYPE_CODE_ARRAY. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); @@ -258,17 +250,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { AbbrevToUse = StructAbbrev; break; } - case Type::UnionTyID: { - const UnionType *UT = cast<UnionType>(T); - // UNION: [eltty x N] - Code = bitc::TYPE_CODE_UNION; - // Output all of the element types. - for (UnionType::element_iterator I = UT->element_begin(), - E = UT->element_end(); I != E; ++I) - TypeVals.push_back(VE.getTypeID(*I)); - AbbrevToUse = UnionAbbrev; - break; - } case Type::ArrayTyID: { const ArrayType *AT = cast<ArrayType>(T); // ARRAY: [numelts, eltty] @@ -299,21 +280,22 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { static unsigned getEncodedLinkage(const GlobalValue *GV) { switch (GV->getLinkage()) { default: llvm_unreachable("Invalid linkage!"); - case GlobalValue::ExternalLinkage: return 0; - case GlobalValue::WeakAnyLinkage: return 1; - case GlobalValue::AppendingLinkage: return 2; - case GlobalValue::InternalLinkage: return 3; - case GlobalValue::LinkOnceAnyLinkage: return 4; - case GlobalValue::DLLImportLinkage: return 5; - case GlobalValue::DLLExportLinkage: return 6; - case GlobalValue::ExternalWeakLinkage: return 7; - case GlobalValue::CommonLinkage: return 8; - case GlobalValue::PrivateLinkage: return 9; - case GlobalValue::WeakODRLinkage: return 10; - case GlobalValue::LinkOnceODRLinkage: return 11; - case GlobalValue::AvailableExternallyLinkage: return 12; - case GlobalValue::LinkerPrivateLinkage: return 13; - case GlobalValue::LinkerPrivateWeakLinkage: return 14; + case GlobalValue::ExternalLinkage: return 0; + case GlobalValue::WeakAnyLinkage: return 1; + case GlobalValue::AppendingLinkage: return 2; + case GlobalValue::InternalLinkage: return 3; + case GlobalValue::LinkOnceAnyLinkage: return 4; + case GlobalValue::DLLImportLinkage: return 5; + case GlobalValue::DLLExportLinkage: return 6; + case GlobalValue::ExternalWeakLinkage: return 7; + case GlobalValue::CommonLinkage: return 8; + case GlobalValue::PrivateLinkage: return 9; + case GlobalValue::WeakODRLinkage: return 10; + case GlobalValue::LinkOnceODRLinkage: return 11; + case GlobalValue::AvailableExternallyLinkage: return 12; + case GlobalValue::LinkerPrivateLinkage: return 13; + case GlobalValue::LinkerPrivateWeakLinkage: return 14; + case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15; } } @@ -503,13 +485,14 @@ static void WriteMDNode(const MDNode *N, Record.push_back(0); } } - unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE : - bitc::METADATA_NODE; + unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE2 : + bitc::METADATA_NODE2; Stream.EmitRecord(MDCode, Record, 0); Record.clear(); } -static void WriteModuleMetadata(const ValueEnumerator &VE, +static void WriteModuleMetadata(const Module *M, + const ValueEnumerator &VE, BitstreamWriter &Stream) { const ValueEnumerator::ValueList &Vals = VE.getMDValues(); bool StartedMetadataBlock = false; @@ -544,29 +527,30 @@ static void WriteModuleMetadata(const ValueEnumerator &VE, // Emit the finished record. Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev); Record.clear(); - } else if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(Vals[i].first)) { - if (!StartedMetadataBlock) { - Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); - StartedMetadataBlock = true; - } - - // Write name. - StringRef Str = NMD->getName(); - for (unsigned i = 0, e = Str.size(); i != e; ++i) - Record.push_back(Str[i]); - Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/); - Record.clear(); + } + } - // Write named metadata operands. - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - if (NMD->getOperand(i)) - Record.push_back(VE.getValueID(NMD->getOperand(i))); - else - Record.push_back(~0U); - } - Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0); - Record.clear(); + // Write named metadata. + for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), + E = M->named_metadata_end(); I != E; ++I) { + const NamedMDNode *NMD = I; + if (!StartedMetadataBlock) { + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + StartedMetadataBlock = true; } + + // Write name. + StringRef Str = NMD->getName(); + for (unsigned i = 0, e = Str.size(); i != e; ++i) + Record.push_back(Str[i]); + Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/); + Record.clear(); + + // Write named metadata operands. + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + Record.push_back(VE.getValueID(NMD->getOperand(i))); + Stream.EmitRecord(bitc::METADATA_NAMED_NODE2, Record, 0); + Record.clear(); } if (StartedMetadataBlock) @@ -601,7 +585,7 @@ static void WriteMetadataAttachment(const Function &F, SmallVector<uint64_t, 64> Record; // Write metadata attachments - // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]] + // METADATA_ATTACHMENT2 - [m x [value, [n x [id, mdnode]]] SmallVector<std::pair<unsigned, MDNode*>, 4> MDs; for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -619,7 +603,7 @@ static void WriteMetadataAttachment(const Function &F, Record.push_back(MDs[i].first); Record.push_back(VE.getValueID(MDs[i].second)); } - Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0); + Stream.EmitRecord(bitc::METADATA_ATTACHMENT2, Record, 0); Record.clear(); } @@ -634,12 +618,11 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) { SmallVector<StringRef, 4> Names; M->getMDKindNames(Names); - assert(Names[0] == "" && "MDKind #0 is invalid"); - if (Names.size() == 1) return; + if (Names.empty()) return; Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); - for (unsigned MDKindID = 1, e = Names.size(); MDKindID != e; ++MDKindID) { + for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) { Record.push_back(MDKindID); StringRef KName = Names[MDKindID]; Record.append(KName.begin(), KName.end()); @@ -734,8 +717,8 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, Code = bitc::CST_CODE_UNDEF; } else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) { if (IV->getBitWidth() <= 64) { - int64_t V = IV->getSExtValue(); - if (V >= 0) + uint64_t V = IV->getSExtValue(); + if ((int64_t)V >= 0) Record.push_back(V << 1); else Record.push_back((-V << 1) | 1); @@ -809,20 +792,6 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) Record.push_back(VE.getValueID(C->getOperand(i))); AbbrevToUse = AggregateAbbrev; - } else if (isa<ConstantUnion>(C)) { - Code = bitc::CST_CODE_AGGREGATE; - - // Unions only have one entry but we must send type along with it. - const Type *EntryKind = C->getOperand(0)->getType(); - - const UnionType *UnTy = cast<UnionType>(C->getType()); - int UnionIndex = UnTy->getElementTypeIndex(EntryKind); - assert(UnionIndex != -1 && "Constant union contains invalid entry"); - - Record.push_back(UnionIndex); - Record.push_back(VE.getValueID(C->getOperand(0))); - - AbbrevToUse = AggregateAbbrev; } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { switch (CE->getOpcode()) { default: @@ -902,6 +871,9 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, Record.push_back(VE.getValueID(BA->getFunction())); Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock())); } else { +#ifndef NDEBUG + C->dump(); +#endif llvm_unreachable("Unknown constant!"); } Stream.EmitRecord(Code, Record, AbbrevToUse); @@ -1139,7 +1111,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType()); const FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - Code = bitc::FUNC_CODE_INST_CALL; + Code = bitc::FUNC_CODE_INST_CALL2; Vals.push_back(VE.getAttributeID(CI.getAttributes())); Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall())); @@ -1283,7 +1255,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, Vals.push_back(DL.getCol()); Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0); Vals.push_back(IA ? VE.getValueID(IA)+1 : 0); - Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals); + Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC2, Vals); Vals.clear(); LastDL = DL; @@ -1532,7 +1504,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { WriteModuleConstants(VE, Stream); // Emit metadata. - WriteModuleMetadata(VE, Stream); + WriteModuleMetadata(M, VE, Stream); // Emit function bodies. for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp index 3a0d3ce0be994..91e115cba6cc4 100644 --- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp +++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp @@ -21,7 +21,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit WriteBitcodePass(raw_ostream &o) - : ModulePass(&ID), OS(o) {} + : ModulePass(ID), OS(o) {} const char *getPassName() const { return "Bitcode Writer"; } diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 7fa425a7d871f..2f02262c36aff 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -75,7 +75,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) { // Insert constants and metadata that are named at module level into the slot // pool so that the module symbol table can refer to them... EnumerateValueSymbolTable(M->getValueSymbolTable()); - EnumerateMDSymbolTable(M->getMDSymbolTable()); + EnumerateNamedMetadata(M); SmallVector<std::pair<unsigned, MDNode*>, 8> MDs; @@ -137,7 +137,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) { unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { InstructionMapType::const_iterator I = InstructionMap.find(Inst); assert (I != InstructionMap.end() && "Instruction is not mapped!"); - return I->second; + return I->second; } void ValueEnumerator::setInstructionID(const Instruction *I) { @@ -207,35 +207,48 @@ void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { EnumerateValue(VI->getValue()); } -/// EnumerateMDSymbolTable - Insert all of the values in the specified metadata -/// table. -void ValueEnumerator::EnumerateMDSymbolTable(const MDSymbolTable &MST) { - for (MDSymbolTable::const_iterator MI = MST.begin(), ME = MST.end(); - MI != ME; ++MI) - EnumerateValue(MI->getValue()); +/// EnumerateNamedMetadata - Insert all of the values referenced by +/// named metadata in the specified module. +void ValueEnumerator::EnumerateNamedMetadata(const Module *M) { + for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), + E = M->named_metadata_end(); I != E; ++I) + EnumerateNamedMDNode(I); } void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) { - // Check to see if it's already in! - unsigned &MDValueID = MDValueMap[MD]; - if (MDValueID) { - // Increment use count. - MDValues[MDValueID-1].second++; - return; + for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) + EnumerateMetadata(MD->getOperand(i)); +} + +/// EnumerateMDNodeOperands - Enumerate all non-function-local values +/// and types referenced by the given MDNode. +void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (Value *V = N->getOperand(i)) { + if (isa<MDNode>(V) || isa<MDString>(V)) + EnumerateMetadata(V); + else if (!isa<Instruction>(V) && !isa<Argument>(V)) + EnumerateValue(V); + } else + EnumerateType(Type::getVoidTy(N->getContext())); } +} + +void ValueEnumerator::EnumerateMetadata(const Value *MD) { + assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind"); // Enumerate the type of this value. EnumerateType(MD->getType()); - for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) - if (MDNode *E = MD->getOperand(i)) - EnumerateValue(E); - MDValues.push_back(std::make_pair(MD, 1U)); - MDValueMap[MD] = Values.size(); -} + const MDNode *N = dyn_cast<MDNode>(MD); + + // In the module-level pass, skip function-local nodes themselves, but + // do walk their operands. + if (N && N->isFunctionLocal() && N->getFunction()) { + EnumerateMDNodeOperands(N); + return; + } -void ValueEnumerator::EnumerateMetadata(const Value *MD) { - assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind"); // Check to see if it's already in! unsigned &MDValueID = MDValueMap[MD]; if (MDValueID) { @@ -243,37 +256,52 @@ void ValueEnumerator::EnumerateMetadata(const Value *MD) { MDValues[MDValueID-1].second++; return; } + MDValues.push_back(std::make_pair(MD, 1U)); + MDValueID = MDValues.size(); + + // Enumerate all non-function-local operands. + if (N) + EnumerateMDNodeOperands(N); +} + +/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata +/// information reachable from the given MDNode. +void ValueEnumerator::EnumerateFunctionLocalMetadata(const MDNode *N) { + assert(N->isFunctionLocal() && N->getFunction() && + "EnumerateFunctionLocalMetadata called on non-function-local mdnode!"); // Enumerate the type of this value. - EnumerateType(MD->getType()); + EnumerateType(N->getType()); - if (const MDNode *N = dyn_cast<MDNode>(MD)) { - MDValues.push_back(std::make_pair(MD, 1U)); - MDValueMap[MD] = MDValues.size(); - MDValueID = MDValues.size(); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - if (Value *V = N->getOperand(i)) - EnumerateValue(V); - else - EnumerateType(Type::getVoidTy(MD->getContext())); - } - if (N->isFunctionLocal() && N->getFunction()) - FunctionLocalMDs.push_back(N); + // Check to see if it's already in! + unsigned &MDValueID = MDValueMap[N]; + if (MDValueID) { + // Increment use count. + MDValues[MDValueID-1].second++; return; } - - // Add the value. - assert(isa<MDString>(MD) && "Unknown metadata kind"); - MDValues.push_back(std::make_pair(MD, 1U)); + MDValues.push_back(std::make_pair(N, 1U)); MDValueID = MDValues.size(); + + // To incoroporate function-local information visit all function-local + // MDNodes and all function-local values they reference. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (Value *V = N->getOperand(i)) { + if (MDNode *O = dyn_cast<MDNode>(V)) { + if (O->isFunctionLocal() && O->getFunction()) + EnumerateFunctionLocalMetadata(O); + } else if (isa<Instruction>(V) || isa<Argument>(V)) + EnumerateValue(V); + } + + // Also, collect all function-local MDNodes for easy access. + FunctionLocalMDs.push_back(N); } void ValueEnumerator::EnumerateValue(const Value *V) { assert(!V->getType()->isVoidTy() && "Can't insert void values!"); - if (isa<MDNode>(V) || isa<MDString>(V)) - return EnumerateMetadata(V); - else if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(V)) - return EnumerateNamedMDNode(NMD); + assert(!isa<MDNode>(V) && !isa<MDString>(V) && + "EnumerateValue doesn't handle Metadata!"); // Check to see if it's already in! unsigned &ValueID = ValueMap[V]; @@ -359,7 +387,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) { // blockaddress. if (isa<BasicBlock>(Op)) continue; - EnumerateOperandType(cast<Constant>(Op)); + EnumerateOperandType(Op); } if (const MDNode *N = dyn_cast<MDNode>(V)) { @@ -368,7 +396,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) { EnumerateOperandType(Elem); } } else if (isa<MDString>(V) || isa<MDNode>(V)) - EnumerateValue(V); + EnumerateMetadata(V); } void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) { @@ -386,10 +414,11 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) { void ValueEnumerator::incorporateFunction(const Function &F) { InstructionCount = 0; NumModuleValues = Values.size(); + NumModuleMDValues = MDValues.size(); // Adding function arguments to the value table. - for(Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); - I != E; ++I) + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I) EnumerateValue(I); FirstFuncConstantID = Values.size(); @@ -416,7 +445,6 @@ void ValueEnumerator::incorporateFunction(const Function &F) { FirstInstID = Values.size(); - FunctionLocalMDs.clear(); SmallVector<MDNode *, 8> FnLocalMDVector; // Add all of the instructions. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { @@ -428,6 +456,15 @@ void ValueEnumerator::incorporateFunction(const Function &F) { // Enumerate metadata after the instructions they might refer to. FnLocalMDVector.push_back(MD); } + + SmallVector<std::pair<unsigned, MDNode*>, 8> MDs; + I->getAllMetadataOtherThanDebugLoc(MDs); + for (unsigned i = 0, e = MDs.size(); i != e; ++i) { + MDNode *N = MDs[i].second; + if (N->isFunctionLocal() && N->getFunction()) + FnLocalMDVector.push_back(N); + } + if (!I->getType()->isVoidTy()) EnumerateValue(I); } @@ -435,18 +472,22 @@ void ValueEnumerator::incorporateFunction(const Function &F) { // Add all of the function-local metadata. for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) - EnumerateOperandType(FnLocalMDVector[i]); + EnumerateFunctionLocalMetadata(FnLocalMDVector[i]); } void ValueEnumerator::purgeFunction() { /// Remove purged values from the ValueMap. for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i) ValueMap.erase(Values[i].first); + for (unsigned i = NumModuleMDValues, e = MDValues.size(); i != e; ++i) + MDValueMap.erase(MDValues[i].first); for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i) ValueMap.erase(BasicBlocks[i]); Values.resize(NumModuleValues); + MDValues.resize(NumModuleMDValues); BasicBlocks.clear(); + FunctionLocalMDs.clear(); } static void IncorporateFunctionInfoGlobalBBIDs(const Function *F, diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 2b9b15fa5a77a..cd1d2371b701a 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -72,6 +72,11 @@ private: /// When a function is incorporated, this is the size of the Values list /// before incorporation. unsigned NumModuleValues; + + /// When a function is incorporated, this is the size of the MDValues list + /// before incorporation. + unsigned NumModuleMDValues; + unsigned FirstFuncConstantID; unsigned FirstInstID; @@ -132,7 +137,9 @@ public: private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); + void EnumerateMDNodeOperands(const MDNode *N); void EnumerateMetadata(const Value *MD); + void EnumerateFunctionLocalMetadata(const MDNode *N); void EnumerateNamedMDNode(const NamedMDNode *NMD); void EnumerateValue(const Value *V); void EnumerateType(const Type *T); @@ -141,7 +148,7 @@ private: void EnumerateTypeSymbolTable(const TypeSymbolTable &ST); void EnumerateValueSymbolTable(const ValueSymbolTable &ST); - void EnumerateMDSymbolTable(const MDSymbolTable &ST); + void EnumerateNamedMetadata(const Module *M); }; } // End llvm namespace diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index a7189acc3fecd..5a634d6ccb018 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -41,8 +41,11 @@ DebugMod("agg-antidep-debugmod", AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB) : - NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0) { - + NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0), + GroupNodeIndices(TargetRegs, 0), + KillIndices(TargetRegs, 0), + DefIndices(TargetRegs, 0) +{ const unsigned BBSize = BB->size(); for (unsigned i = 0; i < NumTargetRegs; ++i) { // Initialize all registers to be in their own group. Initially we @@ -54,8 +57,7 @@ AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, } } -unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) -{ +unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) { unsigned Node = GroupNodeIndices[Reg]; while (GroupNodes[Node] != Node) Node = GroupNodes[Node]; @@ -145,8 +147,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); - unsigned *KillIndices = State->GetKillIndices(); - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); // Determine the live-out physregs for this block. if (IsReturnBlock) { @@ -226,7 +228,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, DEBUG(MI->dump()); DEBUG(dbgs() << "\tRegs:"); - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { // If Reg is current live, then mark that it can't be renamed as // we don't know the extent of its live-range anymore (now that it @@ -328,8 +330,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag, const char *header, const char *footer) { - unsigned *KillIndices = State->GetKillIndices(); - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -364,7 +366,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count, std::set<unsigned>& PassthruRegs) { - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -560,8 +562,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( unsigned AntiDepGroupIndex, RenameOrderType& RenameOrder, std::map<unsigned, unsigned> &RenameMap) { - unsigned *KillIndices = State->GetKillIndices(); - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -652,6 +654,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( if (R == RB) R = RE; --R; const unsigned NewSuperReg = *R; + // Don't consider non-allocatable registers + if (!AllocatableSet.test(NewSuperReg)) continue; // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; @@ -733,8 +737,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex) { - unsigned *KillIndices = State->GetKillIndices(); - unsigned *DefIndices = State->GetDefIndices(); + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 91ebb850d19db..9d715ccf79f8d 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -59,27 +59,27 @@ namespace llvm { /// currently representing the group that the register belongs to. /// Register 0 is always represented by the 0 group, a group /// composed of registers that are not eligible for anti-aliasing. - unsigned GroupNodeIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> GroupNodeIndices; /// RegRefs - Map registers to all their references within a live range. std::multimap<unsigned, RegisterReference> RegRefs; /// KillIndices - The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. - unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> KillIndices; /// DefIndices - The index of the most recent complete def (proceding bottom /// up), or ~0u if the register is live. - unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> DefIndices; public: AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB); /// GetKillIndices - Return the kill indices. - unsigned *GetKillIndices() { return KillIndices; } + std::vector<unsigned> &GetKillIndices() { return KillIndices; } /// GetDefIndices - Return the define indices. - unsigned *GetDefIndices() { return DefIndices; } + std::vector<unsigned> &GetDefIndices() { return DefIndices; } /// GetRegRefs - Return the RegRefs map. std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; } diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index f71eee5d01b89..e3dd646c952ed 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -109,7 +109,7 @@ GlobalVariable *llvm::ExtractTypeInfo(Value *V) { V = V->stripPointerCasts(); GlobalVariable *GV = dyn_cast<GlobalVariable>(V); - if (GV && GV->getName() == ".llvm.eh.catch.all.value") { + if (GV && GV->getName() == "llvm.eh.catch.all.value") { assert(GV->hasInitializer() && "The EH catch-all value must have an initializer"); Value *Init = GV->getInitializer(); @@ -171,7 +171,7 @@ ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) { FOC = FPC = ISD::SETFALSE; break; } - if (FiniteOnlyFPMath()) + if (NoNaNsFPMath) return FOC; else return FPC; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index db1b37ab263fb..d358ab20ffc53 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -91,7 +91,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD, AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) - : MachineFunctionPass(&ID), + : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), OutContext(Streamer.getContext()), OutStreamer(Streamer), @@ -200,11 +200,17 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: + case GlobalValue::LinkerPrivateWeakDefAutoLinkage: if (MAI->getWeakDefDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - // .weak_definition _foo - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); + + if ((GlobalValue::LinkageTypes)Linkage != + GlobalValue::LinkerPrivateWeakDefAutoLinkage) + // .weak_definition _foo + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); + else + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); } else if (MAI->getLinkOnceDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); @@ -510,12 +516,8 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { } // Check for spill-induced copies - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg, - SrcSubIdx, DstSubIdx)) { - if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) - CommentOS << " Reload Reuse\n"; - } + if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) + CommentOS << " Reload Reuse\n"; } /// EmitImplicitDef - This method emits the specified machine instruction @@ -603,12 +605,15 @@ void AsmPrinter::EmitFunctionBody() { // Print out code for the function. bool HasAnyRealCode = false; + const MachineInstr *LastMI = 0; for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { // Print a label for the basic block. EmitBasicBlockStart(I); for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { + LastMI = II; + // Print the assembly for the instruction. if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() && !II->isDebugValue()) { @@ -625,7 +630,7 @@ void AsmPrinter::EmitFunctionBody() { EmitComments(*II, OutStreamer.GetCommentOS()); switch (II->getOpcode()) { - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol()); @@ -656,11 +661,18 @@ void AsmPrinter::EmitFunctionBody() { } } } - + + // If the last instruction was a prolog label, then we have a situation where + // we emitted a prolog but no function body. This results in the ending prolog + // label equaling the end of function label and an invalid "row" in the + // FDE. We need to emit a noop in this situation so that the FDE's rows are + // valid. + bool RequiresNoop = LastMI && LastMI->isPrologLabel(); + // If the function is empty and the object file uses .subsections_via_symbols, // then we need to emit *something* to the function body to prevent the // labels from collapsing together. Just emit a noop. - if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) { + if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) { MCInst Noop; TM.getInstrInfo()->getNoopForMachoTarget(Noop); if (Noop.getOpcode()) { @@ -1206,6 +1218,22 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/); } } + +/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" +/// where the size in bytes of the directive is specified by Size and Label +/// specifies the label. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, + unsigned Size) + const { + + // Emit Label+Offset + const MCExpr *Plus = + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), + MCConstantExpr::Create(Offset, OutContext), + OutContext); + + OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/); +} //===----------------------------------------------------------------------===// @@ -1244,6 +1272,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); + if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); @@ -1262,10 +1291,17 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) if (C != CE) return LowerConstant(C, AP); -#ifndef NDEBUG - CE->dump(); -#endif - llvm_unreachable("FIXME: Don't support this constant expr"); + + // Otherwise report the problem to the user. + { + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + WriteAsOperand(OS, CE, /*PrintType=*/false, + !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); + } + return MCConstantExpr::Create(0, Ctx); case Instruction::GetElementPtr: { const TargetData &TD = *AP.TM.getTargetData(); // Generate a symbolic expression for the byte address @@ -1413,21 +1449,6 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, "Layout of constant struct may be incorrect!"); } -static void EmitGlobalConstantUnion(const ConstantUnion *CU, - unsigned AddrSpace, AsmPrinter &AP) { - const TargetData *TD = AP.TM.getTargetData(); - unsigned Size = TD->getTypeAllocSize(CU->getType()); - - const Constant *Contents = CU->getOperand(0); - unsigned FilledSize = TD->getTypeAllocSize(Contents->getType()); - - // Print the actually filled part - EmitGlobalConstantImpl(Contents, AddrSpace, AP); - - // And pad with enough zeroes - AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace); -} - static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AsmPrinter &AP) { // FP Constants are printed as integer constants to avoid losing @@ -1530,7 +1551,7 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, case 8: if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue()); - AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); + AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); return; default: EmitGlobalConstantLargeInt(CI, AddrSpace, AP); @@ -1553,9 +1574,6 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, return; } - if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV)) - return EmitGlobalConstantUnion(CVU, AddrSpace, AP); - if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) return EmitGlobalConstantVector(V, AddrSpace, AP); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index b310578584bc9..ce4519c541e3b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -36,7 +36,7 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - if (MAI->hasLEB128()) { + if (MAI->hasLEB128() && OutStreamer.hasRawTextSupport()) { // FIXME: MCize. OutStreamer.EmitRawText("\t.sleb128\t" + Twine(Value)); return; @@ -61,7 +61,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - if (MAI->hasLEB128() && PadTo == 0) { + if (MAI->hasLEB128() && PadTo == 0 && OutStreamer.hasRawTextSupport()) { // FIXME: MCize. OutStreamer.EmitRawText("\t.uleb128\t" + Twine(Value)); return; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 202d9b67fd157..df0316814c08b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -22,7 +22,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCParser/AsmParser.h" #include "llvm/Target/TargetAsmParser.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegistry.h" @@ -72,16 +71,18 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { // Tell SrcMgr about this buffer, it takes ownership of the buffer. SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); - AsmParser Parser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI); - OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser)); + OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr, + OutContext, OutStreamer, + *MAI)); + OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*Parser, TM)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); - Parser.setTargetParser(*TAP.get()); + Parser->setTargetParser(*TAP.get()); // Don't implicitly switch to the text section before the asm. - int Res = Parser.Run(/*NoInitialTextSection*/ true, - /*NoFinalize*/ true); + int Res = Parser->Run(/*NoInitialTextSection*/ true, + /*NoFinalize*/ true); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 65c1d190216fa..c886a5ecc6153 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -44,7 +44,7 @@ using namespace llvm; static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden, cl::desc("Print DbgScope information for each machine instruction")); -static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", +static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); @@ -116,8 +116,8 @@ public: /// addGlobalType - Add a new global type to the compile unit. /// - void addGlobalType(StringRef Name, DIE *Die) { - GlobalTypes[Name] = Die; + void addGlobalType(StringRef Name, DIE *Die) { + GlobalTypes[Name] = Die; } /// getDIE - Returns the debug information entry map slot for the @@ -131,8 +131,9 @@ public: /// getDIEEntry - Returns the debug information entry for the speciefied /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap<const MDNode *, DIEEntry *>::iterator I = MDNodeToDIEEntryMap.find(N); + DIEEntry *getDIEEntry(const MDNode *N) { + DenseMap<const MDNode *, DIEEntry *>::iterator I = + MDNodeToDIEEntryMap.find(N); if (I == MDNodeToDIEEntryMap.end()) return NULL; return I->second; @@ -179,6 +180,73 @@ public: DIE *getDIE() const { return TheDIE; } void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } + StringRef getName() const { return Var.getName(); } + unsigned getTag() const { return Var.getTag(); } + bool variableHasComplexAddress() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.hasComplexAddress(); + } + bool isBlockByrefVariable() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.isBlockByrefVariable(); + } + unsigned getNumAddrElements() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.getNumAddrElements(); + } + uint64_t getAddrElement(unsigned i) const { + return Var.getAddrElement(i); + } + DIType getType() const { + DIType Ty = Var.getType(); + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. + if (Var.isBlockByrefVariable()) { + /* Byref variables, in Blocks, are declared by the programmer as + "SomeType VarName;", but the compiler creates a + __Block_byref_x_VarName struct, and gives the variable VarName + either the struct, or a pointer to the struct, as its type. This + is necessary for various behind-the-scenes things the compiler + needs to do with by-reference variables in blocks. + + However, as far as the original *programmer* is concerned, the + variable should still have type 'SomeType', as originally declared. + + The following function dives into the __Block_byref_x_VarName + struct to find the original type of the variable. This will be + passed back to the code generating the type for the Debug + Information Entry for the variable 'VarName'. 'VarName' will then + have the original type 'SomeType' in its debug information. + + The original type 'SomeType' will be the type of the field named + 'VarName' inside the __Block_byref_x_VarName struct. + + NOTE: In order for this to not completely fail on the debugger + side, the Debug Information Entry for the variable VarName needs to + have a DW_AT_location that tells the debugger how to unwind through + the pointers and __Block_byref_x_VarName struct to find the actual + value of the variable. The function addBlockByrefType does this. */ + DIType subType = Ty; + unsigned tag = Ty.getTag(); + + if (tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + subType = DTy.getTypeDerivedFrom(); + } + + DICompositeType blockStruct = DICompositeType(subType); + DIArray Elements = blockStruct.getTypeArray(); + + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + if (getName() == DT.getName()) + return (DT.getTypeDerivedFrom()); + } + return Ty; + } + return Ty; + } }; //===----------------------------------------------------------------------===// @@ -194,7 +262,7 @@ class DbgScope { DbgScope *Parent; // Parent to this scope. DIDescriptor Desc; // Debug info descriptor for scope. // Location at which this scope is inlined. - AssertingVH<const MDNode> InlinedAtLocation; + AssertingVH<const MDNode> InlinedAtLocation; bool AbstractScope; // Abstract Scope const MachineInstr *LastInsn; // Last instruction of this scope. const MachineInstr *FirstInsn; // First instruction of this scope. @@ -220,19 +288,19 @@ public: const MDNode *getInlinedAt() const { return InlinedAtLocation; } const MDNode *getScopeNode() const { return Desc; } const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; } - const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; } + const SmallVector<DbgVariable *, 8> &getDbgVariables() { return Variables; } const SmallVector<DbgRange, 4> &getRanges() { return Ranges; } /// openInsnRange - This scope covers instruction range starting from MI. void openInsnRange(const MachineInstr *MI) { - if (!FirstInsn) + if (!FirstInsn) FirstInsn = MI; - + if (Parent) Parent->openInsnRange(MI); } - /// extendInsnRange - Extend the current instruction range covered by + /// extendInsnRange - Extend the current instruction range covered by /// this scope. void extendInsnRange(const MachineInstr *MI) { assert (FirstInsn && "MI Range is not open!"); @@ -247,9 +315,9 @@ public: void closeInsnRange(DbgScope *NewScope = NULL) { assert (LastInsn && "Last insn missing!"); Ranges.push_back(DbgRange(FirstInsn, LastInsn)); - FirstInsn = NULL; + FirstInsn = NULL; LastInsn = NULL; - // If Parent dominates NewScope then do not close Parent's instruction + // If Parent dominates NewScope then do not close Parent's instruction // range. if (Parent && (!NewScope || !Parent->dominates(NewScope))) Parent->closeInsnRange(NewScope); @@ -264,7 +332,7 @@ public: unsigned getDFSIn() const { return DFSIn; } void setDFSIn(unsigned I) { DFSIn = I; } bool dominates(const DbgScope *S) { - if (S == this) + if (S == this) return true; if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut()) return true; @@ -313,14 +381,13 @@ DbgScope::~DbgScope() { DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), FirstCU(0), - AbbreviationsSet(InitAbbreviationsSetSize), + AbbreviationsSet(InitAbbreviationsSetSize), CurrentFnDbgScope(0), PrevLabel(NULL) { NextStringPoolNumber = 0; - + DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; - DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; - DwarfDebugLineSectionSym = CurrentLineSectionSym = 0; + DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); { @@ -377,7 +444,7 @@ DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = Integer == 1 ? + DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); Die->addValue(Attribute, Form, Value); } @@ -392,7 +459,7 @@ void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, } /// addString - Add a string attribute data and value. DIEString only -/// keeps string reference. +/// keeps string reference. void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, StringRef String) { DIEValue *Value = new (DIEValueAllocator) DIEString(String); @@ -434,14 +501,14 @@ void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form, /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) { +void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) { // Verify variable. - if (!V->Verify()) + if (!V.Verify()) return; - unsigned Line = V->getLineNumber(); - unsigned FileID = GetOrCreateSourceID(V->getContext().getDirectory(), - V->getContext().getFilename()); + unsigned Line = V.getLineNumber(); + unsigned FileID = GetOrCreateSourceID(V.getContext().getDirectory(), + V.getContext().getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -449,14 +516,14 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::addSourceLine(DIE *Die, const DIGlobalVariable *G) { +void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) { // Verify global variable. - if (!G->Verify()) + if (!G.Verify()) return; - unsigned Line = G->getLineNumber(); - unsigned FileID = GetOrCreateSourceID(G->getContext().getDirectory(), - G->getContext().getFilename()); + unsigned Line = G.getLineNumber(); + unsigned FileID = GetOrCreateSourceID(G.getContext().getDirectory(), + G.getContext().getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -464,19 +531,19 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIGlobalVariable *G) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) { +void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) { // Verify subprogram. - if (!SP->Verify()) + if (!SP.Verify()) return; // If the line number is 0, don't add it. - if (SP->getLineNumber() == 0) + if (SP.getLineNumber() == 0) return; - unsigned Line = SP->getLineNumber(); - if (!SP->getContext().Verify()) + unsigned Line = SP.getLineNumber(); + if (!SP.getContext().Verify()) return; - unsigned FileID = GetOrCreateSourceID(SP->getDirectory(), - SP->getFilename()); + unsigned FileID = GetOrCreateSourceID(SP.getDirectory(), + SP.getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -484,16 +551,16 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) { +void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) { // Verify type. - if (!Ty->Verify()) + if (!Ty.Verify()) return; - unsigned Line = Ty->getLineNumber(); - if (!Ty->getContext().Verify()) + unsigned Line = Ty.getLineNumber(); + if (!Ty.getContext().Verify()) return; - unsigned FileID = GetOrCreateSourceID(Ty->getContext().getDirectory(), - Ty->getContext().getFilename()); + unsigned FileID = GetOrCreateSourceID(Ty.getContext().getDirectory(), + Ty.getContext().getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -501,14 +568,14 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) { +void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) { // Verify namespace. - if (!NS->Verify()) + if (!NS.Verify()) return; - unsigned Line = NS->getLineNumber(); - StringRef FN = NS->getFilename(); - StringRef Dir = NS->getDirectory(); + unsigned Line = NS.getLineNumber(); + StringRef FN = NS.getFilename(); + StringRef Dir = NS.getDirectory(); unsigned FileID = GetOrCreateSourceID(Dir, FN); assert(FileID && "Invalid file id"); @@ -516,55 +583,21 @@ void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) { addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } -/* Byref variables, in Blocks, are declared by the programmer as - "SomeType VarName;", but the compiler creates a - __Block_byref_x_VarName struct, and gives the variable VarName - either the struct, or a pointer to the struct, as its type. This - is necessary for various behind-the-scenes things the compiler - needs to do with by-reference variables in blocks. - - However, as far as the original *programmer* is concerned, the - variable should still have type 'SomeType', as originally declared. - - The following function dives into the __Block_byref_x_VarName - struct to find the original type of the variable. This will be - passed back to the code generating the type for the Debug - Information Entry for the variable 'VarName'. 'VarName' will then - have the original type 'SomeType' in its debug information. - - The original type 'SomeType' will be the type of the field named - 'VarName' inside the __Block_byref_x_VarName struct. - - NOTE: In order for this to not completely fail on the debugger - side, the Debug Information Entry for the variable VarName needs to - have a DW_AT_location that tells the debugger how to unwind through - the pointers and __Block_byref_x_VarName struct to find the actual - value of the variable. The function addBlockByrefType does this. */ - -/// Find the type the programmer originally declared the variable to be -/// and return that type. -/// -DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) { - - DIType subType = Ty; - unsigned tag = Ty.getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - subType = DTy.getTypeDerivedFrom(); - } - - DICompositeType blockStruct = DICompositeType(subType); - DIArray Elements = blockStruct.getTypeArray(); - - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIDerivedType DT = DIDerivedType(Element); - if (Name == DT.getName()) - return (DT.getTypeDerivedFrom()); - } +/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based +/// on provided frame index. +void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) { + MachineLocation Location; + unsigned FrameReg; + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + Location.set(FrameReg, Offset); - return Ty; + if (DV->variableHasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV->isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location); } /// addComplexAddress - Start with the address based on the location provided, @@ -575,8 +608,7 @@ DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) { void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location) { - const DIVariable &VD = DV->getVariable(); - DIType Ty = VD.getType(); + DIType Ty = DV->getType(); // Decode the original location, and use that as the start of the byref // variable's location. @@ -603,12 +635,12 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); } - for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) { - uint64_t Element = VD.getAddrElement(i); + for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) { + uint64_t Element = DV->getAddrElement(i); if (Element == DIFactory::OpPlus) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i)); + addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); } else if (Element == DIFactory::OpDeref) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } else llvm_unreachable("unknown DIFactory Opcode"); @@ -681,13 +713,12 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location) { - const DIVariable &VD = DV->getVariable(); - DIType Ty = VD.getType(); + DIType Ty = DV->getType(); DIType TmpTy = Ty; unsigned Tag = Ty.getTag(); bool isPointer = false; - StringRef varName = VD.getName(); + StringRef varName = DV->getName(); if (Tag == dwarf::DW_TAG_pointer_type) { DIDerivedType DTy = DIDerivedType(Ty); @@ -835,26 +866,26 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS, assert (MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); - + // Get the raw data form of the floating point. const APInt FltVal = FPImm.bitcastToAPInt(); const char *FltPtr = (const char*)FltVal.getRawData(); - + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. bool LittleEndian = Asm->getTargetData().isLittleEndian(); int Incr = (LittleEndian ? 1 : -1); int Start = (LittleEndian ? 0 : NumBytes - 1); int Stop = (LittleEndian ? NumBytes : -1); - + // Output the constant to DWARF one byte at a time. for (; Start != Stop; Start += Incr) addUInt(Block, 0, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); - + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); if (VS) addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); - return true; + return true; } @@ -872,7 +903,7 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { ContextDIE->addChild(Die); } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) ContextDIE->addChild(Die); - else + else getCompileUnit(Context)->addDie(Die); } @@ -965,7 +996,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Add source line info if available and TyDesc is not a forward declaration. if (!DTy.isForwardDecl()) - addSourceLine(&Buffer, &DTy); + addSourceLine(&Buffer, DTy); } /// constructTypeDIE - Construct type DIE from DICompositeType. @@ -1039,7 +1070,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addType(ElemDie, DV.getType()); addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - addSourceLine(ElemDie, &DV); + addSourceLine(ElemDie, DV); } else if (Element.isDerivedType()) ElemDie = createMemberDIE(DIDerivedType(Element)); else @@ -1057,7 +1088,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DICompositeType ContainingType = CTy.getContainingType(); if (DIDescriptor(ContainingType).isCompositeType()) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, getOrCreateTypeDIE(DIType(ContainingType))); else { DIDescriptor Context = CTy.getContext(); @@ -1073,7 +1104,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) @@ -1089,7 +1120,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add source line info if available. if (!CTy.isForwardDecl()) - addSourceLine(&Buffer, &CTy); + addSourceLine(&Buffer, CTy); } } @@ -1149,7 +1180,7 @@ DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) { return Enumerator; } -/// getRealLinkageName - If special LLVM prefix that is used to inform the asm +/// getRealLinkageName - If special LLVM prefix that is used to inform the asm /// printer to not emit usual symbol prefix before the symbol name is used then /// return linkage name after skipping this special LLVM prefix. static StringRef getRealLinkageName(StringRef LinkageName) { @@ -1159,40 +1190,16 @@ static StringRef getRealLinkageName(StringRef LinkageName) { return LinkageName; } -/// createGlobalVariableDIE - Create new DIE using GV. -DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) { - // If the global variable was optmized out then no need to create debug info - // entry. - if (!GV.getGlobal()) return NULL; - if (GV.getDisplayName().empty()) return NULL; - - DIE *GVDie = new DIE(dwarf::DW_TAG_variable); - addString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - GV.getDisplayName()); - - StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) - addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); - - addType(GVDie, GV.getType()); - if (!GV.isLocalToUnit()) - addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - addSourceLine(GVDie, &GV); - - return GVDie; -} - /// createMemberDIE - Create new member DIE. -DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { +DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) { DIE *MemberDie = new DIE(DT.getTag()); StringRef Name = DT.getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - + addType(MemberDie, DT.getTypeDerivedFrom()); - addSourceLine(MemberDie, &DT); + addSourceLine(MemberDie, DT); DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); @@ -1240,7 +1247,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, VBaseLocationDie); } else addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); @@ -1261,7 +1268,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { } /// createSubprogramDIE - Create new DIE using SP. -DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { +DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) { CompileUnit *SPCU = getCompileUnit(SP); DIE *SPDie = SPCU->getDIE(SP); if (SPDie) @@ -1277,7 +1284,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, getRealLinkageName(LinkageName)); - addSourceLine(SPDie, &SP); + addSourceLine(SPDie, SP); // Add prototyped tag, if C or ObjC. unsigned Lang = SP.getCompileUnit().getLanguage(); @@ -1302,7 +1309,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex()); addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); - ContainingTypeMap.insert(std::make_pair(SPDie, + ContainingTypeMap.insert(std::make_pair(SPDie, SP.getContainingType())); } @@ -1331,10 +1338,14 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { if (!SP.isLocalToUnit()) addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - + if (SP.isOptimized()) addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + if (unsigned isa = Asm->getISAEncoding()) { + addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + } + // DW_TAG_inlined_subroutine may refer to this DIE. SPCU->insertDIE(SP, SPDie); @@ -1394,18 +1405,18 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); - + // There is not any need to generate specification DIE for a function // defined at compile unit level. If a function is defined inside another // function then gdb prefers the definition at top level and but does not - // expect specification DIE in parent function. So avoid creating + // expect specification DIE in parent function. So avoid creating // specification DIE for a function defined inside a function. if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && + !SP.getContext().isFile() && !isSubprogramContext(SP.getContext())) { addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - - // Add arguments. + + // Add arguments. DICompositeType SPTy = SP.getType(); DIArray Args = SPTy.getTypeArray(); unsigned SPTag = SPTy.getTag(); @@ -1420,11 +1431,11 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { } DIE *SPDeclDie = SPDie; SPDie = new DIE(dwarf::DW_TAG_subprogram); - addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, SPDeclDie); SPCU->addDie(SPDie); } - + // Pick up abstract subprogram DIE. if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { SPDie = new DIE(dwarf::DW_TAG_subprogram); @@ -1459,7 +1470,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in - // .debug_range as a uint, size 4, for now. emitDIE will handle + // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); @@ -1480,7 +1491,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); - + addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); @@ -1493,7 +1504,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges(); - assert (Ranges.empty() == false + assert (Ranges.empty() == false && "DbgScope does not have instruction markers!"); // FIXME : .debug_inlined section specification does not clearly state how @@ -1551,16 +1562,14 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { /// constructVariableDIE - Construct a DIE for the given DbgVariable. DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { - // Get the descriptor. - const DIVariable &VD = DV->getVariable(); - StringRef Name = VD.getName(); + StringRef Name = DV->getName(); if (Name.empty()) return NULL; // Translate tag to proper Dwarf tag. The result variable is dropped for // now. unsigned Tag; - switch (VD.getTag()) { + switch (DV->getTag()) { case dwarf::DW_TAG_return_variable: return NULL; case dwarf::DW_TAG_arg_variable: @@ -1586,18 +1595,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { dwarf::DW_FORM_ref4, AbsDIE); else { addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - addSourceLine(VariableDie, &VD); + addSourceLine(VariableDie, DV->getVariable()); // Add variable type. - // FIXME: isBlockByrefVariable should be reformulated in terms of complex - // addresses instead. - if (VD.isBlockByrefVariable()) - addType(VariableDie, getBlockByrefType(VD.getType(), Name)); - else - addType(VariableDie, VD.getType()); + addType(VariableDie, DV->getType()); } - if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial()) + if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial()) addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); if (Scope->isAbstractScope()) { @@ -1623,15 +1627,22 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { const MachineInstr *DVInsn = DVI->second; const MCSymbol *DVLabel = findVariableLabel(DV); bool updated = false; - // FIXME : Handle getNumOperands != 3 + // FIXME : Handle getNumOperands != 3 if (DVInsn->getNumOperands() == 3) { - if (DVInsn->getOperand(0).isReg()) - updated = - addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0)); + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + if (DVInsn->getOperand(1).isImm() && + TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { + addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm()); + updated = true; + } else + updated = addRegisterAddress(VariableDie, DVLabel, RegOp); + } else if (DVInsn->getOperand(0).isImm()) updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isFPImm()) - updated = + else if (DVInsn->getOperand(0).isFPImm()) + updated = addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0)); } else { MachineLocation Location = Asm->getDebugValueLocation(DVInsn); @@ -1651,24 +1662,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { } DV->setDIE(VariableDie); return VariableDie; - } + } // .. else use frame index, if available. - MachineLocation Location; - unsigned FrameReg; - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); int FI = 0; - if (findVariableFrameIndex(DV, &FI)) { - int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - Location.set(FrameReg, Offset); - - if (VD.hasComplexAddress()) - addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else if (VD.isBlockByrefVariable()) - addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else - addAddress(VariableDie, dwarf::DW_AT_location, Location); - } + if (findVariableFrameIndex(DV, &FI)) + addVariableAddress(DV, VariableDie, FI); + DV->setDIE(VariableDie); return VariableDie; @@ -1677,7 +1677,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { void DwarfDebug::addPubTypes(DISubprogram SP) { DICompositeType SPTy = SP.getType(); unsigned SPTag = SPTy.getTag(); - if (SPTag != dwarf::DW_TAG_subroutine_type) + if (SPTag != dwarf::DW_TAG_subroutine_type) return; DIArray Args = SPTy.getTypeArray(); @@ -1699,7 +1699,7 @@ void DwarfDebug::addPubTypes(DISubprogram SP) { DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; - + DIScope DS(Scope->getScopeNode()); DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) @@ -1718,9 +1718,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { else ScopeDIE = constructLexicalScopeDIE(Scope); if (!ScopeDIE) return NULL; - + // Add variables to scope. - const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables(); + const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); for (unsigned i = 0, N = Variables.size(); i < N; ++i) { DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); if (VariableDIE) @@ -1736,9 +1736,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { ScopeDIE->addChild(NestedDIE); } - if (DS.isSubprogram()) + if (DS.isSubprogram()) addPubTypes(DISubprogram(DS)); - + return ScopeDIE; } @@ -1748,6 +1748,8 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { /// maps as well. unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){ unsigned DId; + assert (DirName.empty() == false && "Invalid directory name!"); + StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName); if (DI != DirectoryIdMap.end()) { DId = DI->getValue(); @@ -1789,12 +1791,12 @@ DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) { TheCU->insertDIE(NS, NDie); if (!NS.getName().empty()) addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); - addSourceLine(NDie, &NS); + addSourceLine(NDie, NS); addToContextOwner(NDie, NS.getContext()); return NDie; } -/// constructCompileUnit - Create new CompileUnit for the given +/// constructCompileUnit - Create new CompileUnit for the given /// metadata node with tag DW_TAG_compile_unit. void DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); @@ -1812,9 +1814,12 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { // simplifies debug range entries. addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. This offset is calculated - // during endMoudle(). - addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + // compile unit in debug_line section. + if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList()) + addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("section_line")); + else + addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); if (!Dir.empty()) addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); @@ -1865,64 +1870,98 @@ CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const { return I->second; } +/// isUnsignedDIType - Return true if type encoding is unsigned. +static bool isUnsignedDIType(DIType Ty) { + DIDerivedType DTy(Ty); + if (DTy.Verify()) + return isUnsignedDIType(DTy.getTypeDerivedFrom()); + + DIBasicType BTy(Ty); + if (BTy.Verify()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char) + return true; + } + return false; +} /// constructGlobalVariableDIE - Construct global variable DIE. void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { - DIGlobalVariable DI_GV(N); + DIGlobalVariable GV(N); // If debug information is malformed then ignore it. - if (DI_GV.Verify() == false) + if (GV.Verify() == false) return; // Check for pre-existence. CompileUnit *TheCU = getCompileUnit(N); - if (TheCU->getDIE(DI_GV)) + if (TheCU->getDIE(GV)) return; - DIE *VariableDie = createGlobalVariableDIE(DI_GV); - if (!VariableDie) - return; - - // Add to map. - TheCU->insertDIE(N, VariableDie); + DIType GTy = GV.getType(); + DIE *VariableDIE = new DIE(GV.getTag()); - // Add to context owner. - DIDescriptor GVContext = DI_GV.getContext(); - // Do not create specification DIE if context is either compile unit - // or a subprogram. - if (DI_GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && - !isSubprogramContext(GVContext)) { - // Create specification DIE. - DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, VariableDie); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(DI_GV.getGlobal())); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - TheCU->addDie(VariableSpecDIE); - } else { - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(DI_GV.getGlobal())); - addBlock(VariableDie, dwarf::DW_AT_location, 0, Block); - } - addToContextOwner(VariableDie, GVContext); - - // Expose as global. FIXME - need to check external flag. - TheCU->addGlobal(DI_GV.getName(), VariableDie); + bool isGlobalVariable = GV.getGlobal() != NULL; - DIType GTy = DI_GV.getType(); + // Add name. + addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, + GV.getDisplayName()); + StringRef LinkageName = GV.getLinkageName(); + if (!LinkageName.empty() && isGlobalVariable) + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); + // Add type. + addType(VariableDIE, GTy); if (GTy.isCompositeType() && !GTy.getName().empty() && !GTy.isForwardDecl()) { DIEEntry *Entry = TheCU->getDIEEntry(GTy); assert(Entry && "Missing global type!"); TheCU->addGlobalType(GTy.getName(), Entry->getEntry()); } + // Add scoping info. + if (!GV.isLocalToUnit()) { + addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + // Expose as global. + TheCU->addGlobal(GV.getName(), VariableDIE); + } + // Add line number info. + addSourceLine(VariableDIE, GV); + // Add to map. + TheCU->insertDIE(N, VariableDIE); + // Add to context owner. + DIDescriptor GVContext = GV.getContext(); + addToContextOwner(VariableDIE, GVContext); + // Add location. + if (isGlobalVariable) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getSymbol(GV.getGlobal())); + // Do not create specification DIE if context is either compile unit + // or a subprogram. + if (GV.isDefinition() && !GVContext.isCompileUnit() && + !GVContext.isFile() && !isSubprogramContext(GVContext)) { + // Create specification DIE. + DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); + addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, + dwarf::DW_FORM_ref4, VariableDIE); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + TheCU->addDie(VariableSpecDIE); + } else { + addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + } + } else if (Constant *C = GV.getConstant()) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { + if (isUnsignedDIType(GTy)) + addUInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + CI->getZExtValue()); + else + addSInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + CI->getSExtValue()); + } + } return; } @@ -1965,7 +2004,7 @@ void DwarfDebug::beginModule(Module *M) { DbgFinder.processModule(*M); bool HasDebugInfo = false; - + // Scan all the compile-units to see if there are any marked as the main unit. // if not, we do not generate debug info. for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), @@ -1975,15 +2014,15 @@ void DwarfDebug::beginModule(Module *M) { break; } } - + if (!HasDebugInfo) return; // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); - + // Emit initial sections. EmitSectionLabels(); - + // Create all the compile unit DIEs. for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), E = DbgFinder.compile_unit_end(); I != E; ++I) @@ -1999,6 +2038,11 @@ void DwarfDebug::beginModule(Module *M) { E = DbgFinder.global_variable_end(); I != E; ++I) constructGlobalVariableDIE(*I); + //getOrCreateTypeDIE + if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + // Prime section data. SectionMap.insert(Asm->getObjFileLowering().getTextSection()); @@ -2025,6 +2069,7 @@ void DwarfDebug::beginModule(Module *M) { void DwarfDebug::endModule() { if (!FirstCU) return; const Module *M = MMI->getModule(); + DenseMap<const MDNode *, DbgScope *> DeadFnScopeMap; if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) { for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) { if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue; @@ -2032,25 +2077,27 @@ void DwarfDebug::endModule() { if (!SP.Verify()) continue; // Collect info for variables that were optimized out. + if (!SP.isDefinition()) continue; StringRef FName = SP.getLinkageName(); if (FName.empty()) FName = SP.getName(); - NamedMDNode *NMD = + NamedMDNode *NMD = M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName))); if (!NMD) continue; unsigned E = NMD->getNumOperands(); if (!E) continue; DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL); + DeadFnScopeMap[SP] = Scope; for (unsigned I = 0; I != E; ++I) { DIVariable DV(NMD->getOperand(I)); if (!DV.Verify()) continue; Scope->addVariable(new DbgVariable(DV)); } - + // Construct subprogram DIE and add variables DIEs. constructSubprogramDIE(SP); DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP); - const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables(); + const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); for (unsigned i = 0, N = Variables.size(); i < N; ++i) { DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); if (VariableDIE) @@ -2099,15 +2146,15 @@ void DwarfDebug::endModule() { // Compute DIE offsets and sizes. computeSizeAndOffsets(); - // Emit source line correspondence into a debug line section. - emitDebugLines(); - // Emit all the DIEs into a debug info section emitDebugInfo(); // Corresponding abbreviations into a abbrev section. emitAbbreviations(); + // Emit source line correspondence into a debug line section. + emitDebugLines(); + // Emit info into a debug pubnames section. emitDebugPubNames(); @@ -2131,7 +2178,9 @@ void DwarfDebug::endModule() { // Emit info into a debug str section. emitDebugStr(); - + + // clean up. + DeleteContainerSeconds(DeadFnScopeMap); for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) delete I->second; @@ -2139,7 +2188,7 @@ void DwarfDebug::endModule() { } /// findAbstractVariable - Find abstract variable, if any, associated with Var. -DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, +DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, DebugLoc ScopeLoc) { DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var); @@ -2159,7 +2208,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, /// collectVariableInfoFromMMITable - Collect variable information from /// side table maintained by MMI. -void +void DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, SmallPtrSet<const MDNode *, 16> &Processed) { const LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); @@ -2177,7 +2226,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, Scope = ConcreteScopes.lookup(IA); if (Scope == 0) Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx)); - + // If variable scope is not found then skip this variable. if (Scope == 0) continue; @@ -2193,7 +2242,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, } } -/// isDbgValueInUndefinedReg - Return true if debug value, encoded by +/// isDbgValueInUndefinedReg - Return true if debug value, encoded by /// DBG_VALUE instruction, is in undefined reg. static bool isDbgValueInUndefinedReg(const MachineInstr *MI) { assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); @@ -2202,7 +2251,7 @@ static bool isDbgValueInUndefinedReg(const MachineInstr *MI) { return false; } -/// isDbgValueInDefinedReg - Return true if debug value, encoded by +/// isDbgValueInDefinedReg - Return true if debug value, encoded by /// DBG_VALUE instruction, is in a defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); @@ -2212,10 +2261,10 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { } /// collectVariableInfo - Populate DbgScope entries with variables' info. -void +void DwarfDebug::collectVariableInfo(const MachineFunction *MF, SmallPtrSet<const MDNode *, 16> &Processed) { - + /// collection info from MMI table. collectVariableInfoFromMMITable(MF, Processed); @@ -2244,11 +2293,11 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, continue; const MachineInstr *PrevMI = MInsn; - for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1, + for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1, ME = DbgValues.end(); MI != ME; ++MI) { - const MDNode *Var = + const MDNode *Var = (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata(); - if (Var == DV && isDbgValueInDefinedReg(*MI) && + if (Var == DV && isDbgValueInDefinedReg(*MI) && !PrevMI->isIdenticalTo(*MI)) MultipleValues.push_back(*MI); PrevMI = *MI; @@ -2269,7 +2318,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, DbgVariable *RegVar = new DbgVariable(DV); Scope->addVariable(RegVar); if (!CurFnArg) - DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn); + DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn); if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { DbgVariableToDbgInstMap[AbsVar] = MInsn; VarToAbstractVarMap[RegVar] = AbsVar; @@ -2286,26 +2335,39 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); const MachineInstr *Begin = NULL; const MachineInstr *End = NULL; - for (SmallVector<const MachineInstr *, 4>::iterator - MVI = MultipleValues.begin(), MVE = MultipleValues.end(); + for (SmallVector<const MachineInstr *, 4>::iterator + MVI = MultipleValues.begin(), MVE = MultipleValues.end(); MVI != MVE; ++MVI) { if (!Begin) { Begin = *MVI; continue; - } + } End = *MVI; MachineLocation MLoc; - MLoc.set(Begin->getOperand(0).getReg(), 0); + if (Begin->getNumOperands() == 3) { + if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) + MLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm()); + } else + MLoc = Asm->getDebugValueLocation(Begin); + const MCSymbol *FLabel = getLabelBeforeInsn(Begin); const MCSymbol *SLabel = getLabelBeforeInsn(End); - DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc)); + if (MLoc.getReg()) + DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc)); + Begin = End; if (MVI + 1 == MVE) { // If End is the last instruction then its value is valid // until the end of the funtion. - MLoc.set(End->getOperand(0).getReg(), 0); - DotDebugLocEntries. - push_back(DotDebugLocEntry(SLabel, FunctionEndSym, MLoc)); + MachineLocation EMLoc; + if (End->getNumOperands() == 3) { + if (End->getOperand(0).isReg() && Begin->getOperand(1).isImm()) + EMLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm()); + } else + EMLoc = Asm->getDebugValueLocation(End); + if (EMLoc.getReg()) + DotDebugLocEntries. + push_back(DotDebugLocEntry(SLabel, FunctionEndSym, EMLoc)); } } DotDebugLocEntries.push_back(DotDebugLocEntry()); @@ -2314,11 +2376,11 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, // Collect info for variables that were optimized out. const Function *F = MF->getFunction(); const Module *M = F->getParent(); - if (NamedMDNode *NMD = - M->getNamedMetadata(Twine("llvm.dbg.lv.", + if (NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(F->getName())))) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i))); + DIVariable DV(cast<MDNode>(NMD->getOperand(i))); if (!DV || !Processed.insert(DV)) continue; DbgScope *Scope = DbgScopeMap.lookup(DV.getContext()); @@ -2364,7 +2426,7 @@ void DwarfDebug::beginScope(const MachineInstr *MI) { return; } - // If location is unknown then use temp label for this DBG_VALUE + // If location is unknown then use temp label for this DBG_VALUE // instruction. if (MI->isDebugValue()) { PrevLabel = MMI->getContext().CreateTempSymbol(); @@ -2393,7 +2455,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) { } /// getOrCreateDbgScope - Create DbgScope for the scope. -DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, +DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt) { if (!InlinedAt) { DbgScope *WScope = DbgScopeMap.lookup(Scope); @@ -2402,7 +2464,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL); DbgScopeMap.insert(std::make_pair(Scope, WScope)); if (DIDescriptor(Scope).isLexicalBlock()) { - DbgScope *Parent = + DbgScope *Parent = getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL); WScope->setParent(Parent); Parent->addScope(WScope); @@ -2419,7 +2481,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, DISubprogram(Scope).getFunction() == Asm->MF->getFunction()) CurrentFnDbgScope = WScope; } - + return WScope; } @@ -2448,14 +2510,14 @@ static bool hasValidLocation(LLVMContext &Ctx, const MDNode *&Scope, const MDNode *&InlinedAt) { DebugLoc DL = MInsn->getDebugLoc(); if (DL.isUnknown()) return false; - + const MDNode *S = DL.getScope(Ctx); - + // There is no need to create another DIE for compile unit. For all // other scopes, create one DbgScope now. This will be translated // into a scope DIE at the end. if (DIScope(S).isCompileUnit()) return false; - + Scope = S; InlinedAt = DL.getInlinedAt(Ctx); return true; @@ -2490,7 +2552,7 @@ static void calculateDominanceGraph(DbgScope *Scope) { } /// printDbgScopeInfo - Print DbgScope info for each machine instruction. -static +static void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap) { @@ -2507,9 +2569,9 @@ void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, // Check if instruction has valid location information. if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) { dbgs() << " [ "; - if (InlinedAt) + if (InlinedAt) dbgs() << "*"; - DenseMap<const MachineInstr *, DbgScope *>::iterator DI = + DenseMap<const MachineInstr *, DbgScope *>::iterator DI = MI2ScopeMap.find(MInsn); if (DI != MI2ScopeMap.end()) { DbgScope *S = DI->second; @@ -2517,7 +2579,7 @@ void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF, PrevDFSIn = S->getDFSIn(); } else dbgs() << PrevDFSIn; - } else + } else dbgs() << " [ x" << PrevDFSIn; dbgs() << " ]"; MInsn->dump(); @@ -2555,26 +2617,26 @@ bool DwarfDebug::extractScopeInformation() { PrevMI = MInsn; continue; } - + // If scope has not changed then skip this instruction. if (Scope == PrevScope && PrevInlinedAt == InlinedAt) { PrevMI = MInsn; continue; } - if (RangeBeginMI) { - // If we have alread seen a beginning of a instruction range and + if (RangeBeginMI) { + // If we have alread seen a beginning of a instruction range and // current instruction scope does not match scope of first instruction // in this range then create a new instruction range. DbgRange R(RangeBeginMI, PrevMI); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); MIRanges.push_back(R); - } + } // This is a beginning of a new instruction range. RangeBeginMI = MInsn; - + // Reset previous markers. PrevMI = MInsn; PrevScope = Scope; @@ -2588,7 +2650,7 @@ bool DwarfDebug::extractScopeInformation() { MIRanges.push_back(R); MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); } - + if (!CurrentFnDbgScope) return false; @@ -2618,7 +2680,7 @@ bool DwarfDebug::extractScopeInformation() { return !DbgScopeMap.empty(); } -/// identifyScopeMarkers() - +/// identifyScopeMarkers() - /// Each DbgScope has first instruction and last instruction to mark beginning /// and end of a scope respectively. Create an inverse map that list scopes /// starts (and ends) with an instruction. One instruction may start (or end) @@ -2628,23 +2690,23 @@ void DwarfDebug::identifyScopeMarkers() { WorkList.push_back(CurrentFnDbgScope); while (!WorkList.empty()) { DbgScope *S = WorkList.pop_back_val(); - + const SmallVector<DbgScope *, 4> &Children = S->getScopes(); - if (!Children.empty()) + if (!Children.empty()) for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(), SE = Children.end(); SI != SE; ++SI) WorkList.push_back(*SI); if (S->isAbstractScope()) continue; - + const SmallVector<DbgRange, 4> &Ranges = S->getRanges(); if (Ranges.empty()) continue; for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { - assert(RI->first && "DbgRange does not have first instruction!"); - assert(RI->second && "DbgRange does not have second instruction!"); + assert(RI->first && "DbgRange does not have first instruction!"); + assert(RI->second && "DbgRange does not have second instruction!"); InsnsEndScopeSet.insert(RI->second); } } @@ -2680,20 +2742,23 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // function. DebugLoc FDL = FindFirstDebugLoc(MF); if (FDL.isUnknown()) return; - + const MDNode *Scope = FDL.getScope(MF->getFunction()->getContext()); - + const MDNode *TheScope = 0; + DISubprogram SP = getDISubprogram(Scope); unsigned Line, Col; if (SP.Verify()) { Line = SP.getLineNumber(); Col = 0; + TheScope = SP; } else { Line = FDL.getLine(); Col = FDL.getCol(); + TheScope = Scope; } - - recordSourceLine(Line, Col, Scope); + + recordSourceLine(Line, Col, TheScope); /// ProcessedArgs - Collection of arguments already processed. SmallPtrSet<const MDNode *, 8> ProcessedArgs; @@ -2710,7 +2775,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata()); if (!DV.Verify()) continue; // If DBG_VALUE is for a local variable then it needs a label. - if (DV.getTag() != dwarf::DW_TAG_arg_variable + if (DV.getTag() != dwarf::DW_TAG_arg_variable && isDbgValueInUndefinedReg(MI) == false) InsnNeedsLabel.insert(MI); // DBG_VALUE for inlined functions argument needs a label. @@ -2718,10 +2783,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { describes(MF->getFunction())) InsnNeedsLabel.insert(MI); // DBG_VALUE indicating argument location change needs a label. - else if (isDbgValueInUndefinedReg(MI) == false && !ProcessedArgs.insert(DV)) + else if (isDbgValueInUndefinedReg(MI) == false + && !ProcessedArgs.insert(DV)) InsnNeedsLabel.insert(MI); } else { - // If location is unknown then instruction needs a location only if + // If location is unknown then instruction needs a location only if // UnknownLocations flag is set. if (DL.isUnknown()) { if (UnknownLocations && !PrevLoc.isUnknown()) @@ -2730,7 +2796,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Otherwise, instruction needs a location only if it is new location. InsnNeedsLabel.insert(MI); } - + if (!DL.isUnknown() || UnknownLocations) PrevLoc = DL; } @@ -2750,7 +2816,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionEndSym); - + SmallPtrSet<const MDNode *, 16> ProcessedVars; collectVariableInfo(MF, ProcessedVars); @@ -2764,7 +2830,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { SectionLineInfos.insert(SectionLineInfos.end(), Lines.begin(), Lines.end()); } - + // Construct abstract scopes. for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(), AE = AbstractScopesList.end(); AI != AE; ++AI) { @@ -2775,11 +2841,11 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { if (FName.empty()) FName = SP.getName(); const Module *M = MF->getFunction()->getParent(); - if (NamedMDNode *NMD = - M->getNamedMetadata(Twine("llvm.dbg.lv.", + if (NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName)))) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i))); + DIVariable DV(cast<MDNode>(NMD->getOperand(i))); if (!DV || !ProcessedVars.insert(DV)) continue; DbgScope *Scope = AbstractScopes.lookup(DV.getContext()); @@ -2793,9 +2859,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { } DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); - + if (!DisableFramePointerElim(*MF)) - addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, + addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, dwarf::DW_FORM_flag, 1); @@ -2849,22 +2915,22 @@ const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) { else return I->second; } -/// findDbgScope - Find DbgScope for the debug loc attached with an +/// findDbgScope - Find DbgScope for the debug loc attached with an /// instruction. DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { DbgScope *Scope = NULL; - LLVMContext &Ctx = + LLVMContext &Ctx = MInsn->getParent()->getParent()->getFunction()->getContext(); DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) + if (DL.isUnknown()) return Scope; if (const MDNode *IA = DL.getInlinedAt(Ctx)) Scope = ConcreteScopes.lookup(IA); if (Scope == 0) Scope = DbgScopeMap.lookup(DL.getScope(Ctx)); - + return Scope; } @@ -2872,7 +2938,7 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. -MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, +MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S) { StringRef Dir; StringRef Fn; @@ -2899,16 +2965,6 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, Src = GetOrCreateSourceID(Dir, Fn); } -#if 0 - if (!Lines.empty()) { - SrcLineInfo lastSrcLineInfo = Lines.back(); - // Emitting sequential line records with the same line number (but - // different addresses) seems to confuse GDB. Avoid this. - if (lastSrcLineInfo.getLine() == Line) - return NULL; - } -#endif - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); Lines.push_back(SrcLineInfo(Line, Col, Src, Label)); @@ -2991,7 +3047,7 @@ static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section, const char *SymbolStem = 0) { Asm->OutStreamer.SwitchSection(Section); if (!SymbolStem) return 0; - + MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); Asm->OutStreamer.EmitLabel(TmpSym); return TmpSym; @@ -3008,21 +3064,20 @@ void DwarfDebug::EmitSectionLabels() { EmitSectionSym(Asm, TLOF.getDwarfFrameSection(), "section_debug_frame"); } - DwarfInfoSectionSym = + DwarfInfoSectionSym = EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); - DwarfAbbrevSectionSym = + DwarfAbbrevSectionSym = EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); EmitSectionSym(Asm, TLOF.getDwarfARangesSection()); - + if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) EmitSectionSym(Asm, MacroInfo); - DwarfDebugLineSectionSym = - EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); + EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); EmitSectionSym(Asm, TLOF.getDwarfLocSection()); EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); - DwarfStrSectionSym = + DwarfStrSectionSym = EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str"); DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); @@ -3060,7 +3115,7 @@ void DwarfDebug::emitDIE(DIE *Die) { if (Asm->isVerbose()) Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); - + switch (Attr) { case dwarf::DW_AT_sibling: Asm->EmitInt32(Die->getSiblingOffset()); @@ -3075,15 +3130,17 @@ void DwarfDebug::emitDIE(DIE *Die) { case dwarf::DW_AT_ranges: { // DW_AT_range Value encodes offset in debug_range section. DIEInteger *V = cast<DIEInteger>(Values[i]); - Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym, - V->getValue(), - DwarfDebugRangeSectionSym, - 4); - break; - } - case dwarf::DW_AT_stmt_list: { - Asm->EmitLabelDifference(CurrentLineSectionSym, - DwarfDebugLineSectionSym, 4); + + if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) { + Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, + V->getValue(), + 4); + } else { + Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym, + V->getValue(), + DwarfDebugRangeSectionSym, + 4); + } break; } case dwarf::DW_AT_location: { @@ -3124,18 +3181,18 @@ void DwarfDebug::emitDebugInfo() { E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; DIE *Die = TheCU->getCUDie(); - + // Emit the compile units header. Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin", TheCU->getID())); - + // Emit size of content not including length itself unsigned ContentSize = Die->getSize() + sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t) + // Pointer Size (in bytes) sizeof(int32_t); // FIXME - extra pad for gdb bug. - + Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); Asm->EmitInt32(ContentSize); Asm->OutStreamer.AddComment("DWARF version number"); @@ -3145,7 +3202,7 @@ void DwarfDebug::emitDebugInfo() { DwarfAbbrevSectionSym); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getTargetData().getPointerSize()); - + emitDIE(Die); // FIXME - extra padding for gdb bug. Asm->OutStreamer.AddComment("4 extra padding bytes for GDB"); @@ -3194,7 +3251,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Define last address of section. Asm->OutStreamer.AddComment("Extended Op"); Asm->EmitInt8(0); - + Asm->OutStreamer.AddComment("Op size"); Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1); Asm->OutStreamer.AddComment("DW_LNE_set_address"); @@ -3231,15 +3288,13 @@ void DwarfDebug::emitDebugLines() { Asm->getObjFileLowering().getDwarfLineSection()); // Construct the section header. - CurrentLineSectionSym = Asm->GetTempSymbol("section_line_begin"); - Asm->OutStreamer.EmitLabel(CurrentLineSectionSym); Asm->OutStreamer.AddComment("Length of Source Line Info"); Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"), Asm->GetTempSymbol("line_begin"), 4); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_begin")); Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->OutStreamer.AddComment("Prolog Length"); Asm->EmitLabelDifference(Asm->GetTempSymbol("line_prolog_end"), @@ -3294,7 +3349,7 @@ void DwarfDebug::emitDebugLines() { const std::string &FN = getSourceFileName(Id.second); if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Source"); Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0); - + Asm->EmitULEB128(Id.first, "Directory #"); Asm->EmitULEB128(0, "Mod date"); Asm->EmitULEB128(0, "File size"); @@ -3338,18 +3393,18 @@ void DwarfDebug::emitDebugLines() { Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1); Asm->OutStreamer.AddComment("DW_LNE_set_address"); - Asm->EmitInt8(dwarf::DW_LNE_set_address); + Asm->EmitInt8(dwarf::DW_LNE_set_address); Asm->OutStreamer.AddComment("Location label"); Asm->OutStreamer.EmitSymbolValue(Label, Asm->getTargetData().getPointerSize(), 0/*AddrSpace*/); - + // If change of source, then switch to the new source. if (Source != LineInfo.getSourceID()) { Source = LineInfo.getSourceID(); Asm->OutStreamer.AddComment("DW_LNS_set_file"); - Asm->EmitInt8(dwarf::DW_LNS_set_file); + Asm->EmitInt8(dwarf::DW_LNS_set_file); Asm->EmitULEB128(Source, "New Source"); } @@ -3457,7 +3512,7 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) { Asm->OutStreamer.EmitLabel(DebugFrameBegin); Asm->OutStreamer.AddComment("FDE CIE offset"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"), + Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"), DwarfFrameSectionSym); Asm->OutStreamer.AddComment("FDE initial location"); @@ -3466,8 +3521,8 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) { Asm->OutStreamer.EmitSymbolValue(FuncBeginSym, Asm->getTargetData().getPointerSize(), 0/*AddrSpace*/); - - + + Asm->OutStreamer.AddComment("FDE address range"); Asm->EmitLabelDifference(Asm->GetTempSymbol("func_end",DebugFrameInfo.Number), FuncBeginSym, Asm->getTargetData().getPointerSize()); @@ -3487,41 +3542,41 @@ void DwarfDebug::emitDebugPubNames() { // Start the dwarf pubnames section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfPubNamesSection()); - + Asm->OutStreamer.AddComment("Length of Public Names Info"); Asm->EmitLabelDifference( Asm->GetTempSymbol("pubnames_end", TheCU->getID()), Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4); - + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", TheCU->getID())); - + Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - + Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), + Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), DwarfInfoSectionSym); - + Asm->OutStreamer.AddComment("Compilation Unit Length"); Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), Asm->GetTempSymbol("info_begin", TheCU->getID()), 4); - + const StringMap<DIE*> &Globals = TheCU->getGlobals(); for (StringMap<DIE*>::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); DIE *Entity = GI->second; - + Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); - + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); } - + Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", @@ -3540,37 +3595,37 @@ void DwarfDebug::emitDebugPubTypes() { Asm->EmitLabelDifference( Asm->GetTempSymbol("pubtypes_end", TheCU->getID()), Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4); - + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", TheCU->getID())); - + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); Asm->EmitInt16(dwarf::DWARF_VERSION); - + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), DwarfInfoSectionSym); - + Asm->OutStreamer.AddComment("Compilation Unit Length"); Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), Asm->GetTempSymbol("info_begin", TheCU->getID()), 4); - + const StringMap<DIE*> &Globals = TheCU->getGlobalTypes(); for (StringMap<DIE*>::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); DIE * Entity = GI->second; - + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); - + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); } - + Asm->OutStreamer.AddComment("End Mark"); - Asm->EmitInt32(0); + Asm->EmitInt32(0); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", TheCU->getID())); } @@ -3581,26 +3636,26 @@ void DwarfDebug::emitDebugPubTypes() { void DwarfDebug::emitDebugStr() { // Check to see if it is worth the effort. if (StringPool.empty()) return; - + // Start the dwarf str section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfStrSection()); // Get all of the string pool entries and put them in an array by their ID so // we can sort them. - SmallVector<std::pair<unsigned, + SmallVector<std::pair<unsigned, StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries; - + for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator I = StringPool.begin(), E = StringPool.end(); I != E; ++I) Entries.push_back(std::make_pair(I->second.second, &*I)); - + array_pod_sort(Entries.begin(), Entries.end()); - + for (unsigned i = 0, e = Entries.size(); i != e; ++i) { // Emit a label for reference from debug information entries. Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first); - + // Emit the string itself. Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/); } @@ -3618,8 +3673,8 @@ void DwarfDebug::emitDebugLoc() { unsigned char Size = Asm->getTargetData().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; - for (SmallVector<DotDebugLocEntry, 4>::iterator - I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); + for (SmallVector<DotDebugLocEntry, 4>::iterator + I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); I != E; ++I, ++index) { DotDebugLocEntry Entry = *I; if (Entry.isEmpty()) { @@ -3631,15 +3686,30 @@ void DwarfDebug::emitDebugLoc() { Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false); - if (Reg < 32) { + if (int Offset = Entry.Loc.getOffset()) { + // If the value is at a certain offset from frame register then + // use DW_OP_fbreg. + unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1; Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1); - Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg); + Asm->EmitInt16(1 + OffsetSize); + Asm->OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_fbreg)); + Asm->EmitInt8(dwarf::DW_OP_fbreg); + Asm->OutStreamer.AddComment("Offset"); + Asm->EmitSLEB128(Offset); } else { - Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1+MCAsmInfo::getULEB128Size(Reg)); - Asm->EmitInt8(dwarf::DW_OP_regx); - Asm->EmitULEB128(Reg); + if (Reg < 32) { + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(1); + Asm->OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); + Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg); + } else { + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg)); + Asm->EmitInt8(dwarf::DW_OP_regx); + Asm->EmitULEB128(Reg); + } } } } @@ -3661,7 +3731,7 @@ void DwarfDebug::emitDebugRanges() { Asm->getObjFileLowering().getDwarfRangesSection()); unsigned char Size = Asm->getTargetData().getPointerSize(); for (SmallVector<const MCSymbol *, 8>::iterator - I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); + I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); I != E; ++I) { if (*I) Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0); @@ -3734,7 +3804,7 @@ void DwarfDebug::emitDebugInlineInfo() { if (LName.empty()) { Asm->OutStreamer.EmitBytes(Name, 0); Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator. - } else + } else Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)), DwarfStrSectionSym); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 5a281c8517481..f0ff3bc71699a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -261,7 +261,6 @@ class DwarfDebug { MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; MCSymbol *DwarfDebugLocSectionSym; - MCSymbol *DwarfDebugLineSectionSym, *CurrentLineSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; DIEInteger *DIEIntegerOne; @@ -338,11 +337,11 @@ private: /// addSourceLine - Add location information to specified debug information /// entry. - void addSourceLine(DIE *Die, const DIVariable *V); - void addSourceLine(DIE *Die, const DIGlobalVariable *G); - void addSourceLine(DIE *Die, const DISubprogram *SP); - void addSourceLine(DIE *Die, const DIType *Ty); - void addSourceLine(DIE *Die, const DINameSpace *NS); + void addSourceLine(DIE *Die, DIVariable V); + void addSourceLine(DIE *Die, DIGlobalVariable G); + void addSourceLine(DIE *Die, DISubprogram SP); + void addSourceLine(DIE *Die, DIType Ty); + void addSourceLine(DIE *Die, DINameSpace NS); /// addAddress - Add an address attribute to a die based on the location /// provided. @@ -376,6 +375,10 @@ private: void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location); + /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based + /// on provided frame index. + void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI); + /// addToContextOwner - Add Die into the list of its context owner's children. void addToContextOwner(DIE *Die, DIDescriptor Context); @@ -414,14 +417,11 @@ private: /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. DIE *constructEnumTypeDIE(DIEnumerator ETy); - /// createGlobalVariableDIE - Create new DIE using GV. - DIE *createGlobalVariableDIE(const DIGlobalVariable &GV); - /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(const DIDerivedType &DT); + DIE *createMemberDIE(DIDerivedType DT); /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false); + DIE *createSubprogramDIE(DISubprogram SP, bool MakeDecl = false); /// getOrCreateDbgScope - Create DbgScope for the scope. DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); @@ -560,12 +560,6 @@ private: /// construct SubprogramDIE - Construct subprogram DIE. void constructSubprogramDIE(const MDNode *N); - // FIXME: This should go away in favor of complex addresses. - /// Find the type the programmer originally declared the variable to be - /// and return that type. Obsolete, use GetComplexAddrType instead. - /// - DIType getBlockByrefType(DIType Ty, std::string Name); - /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index c87284083cded..86a368831e0e2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -894,7 +894,7 @@ void DwarfException::EndModule() { if (!shouldEmitMovesModule && !shouldEmitTableModule) return; - const std::vector<const Function *> Personalities = MMI->getPersonalities(); + const std::vector<const Function*> &Personalities = MMI->getPersonalities(); for (unsigned I = 0, E = Personalities.size(); I < E; ++I) EmitCIE(Personalities[I], I); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 7f98df0d22ea4..cb81aa3c88ce6 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -65,7 +65,7 @@ namespace { public: static char ID; explicit BranchFolderPass(bool defaultEnableTailMerge) - : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {} + : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "Control Flow Optimizer"; } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index ffeff1ee27a64..2ef115dbd2056 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen LiveIntervalAnalysis.cpp LiveStackAnalysis.cpp LiveVariables.cpp + LocalStackSlotAllocation.cpp LowerSubregs.cpp MachineBasicBlock.cpp MachineCSE.cpp @@ -42,10 +43,10 @@ add_llvm_library(LLVMCodeGen MachineVerifier.cpp ObjectCodeEmitter.cpp OcamlGC.cpp - OptimizeExts.cpp OptimizePHIs.cpp PHIElimination.cpp Passes.cpp + PeepholeOptimizer.cpp PostRAHazardRecognizer.cpp PostRASchedulerList.cpp PreAllocSplitting.cpp @@ -57,6 +58,7 @@ add_llvm_library(LLVMCodeGen RegAllocPBQP.cpp RegisterCoalescer.cpp RegisterScavenging.cpp + RenderMachineFunction.cpp ScheduleDAG.cpp ScheduleDAGEmit.cpp ScheduleDAGInstrs.cpp @@ -67,6 +69,8 @@ add_llvm_library(LLVMCodeGen SjLjEHPrepare.cpp SlotIndexes.cpp Spiller.cpp + SplitKit.cpp + Splitter.cpp StackProtector.cpp StackSlotColoring.cpp StrongPHIElimination.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 240a7b94fccfd..1b7e08a8b6bb8 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -25,8 +25,8 @@ using namespace llvm; char CalculateSpillWeights::ID = 0; -static RegisterPass<CalculateSpillWeights> X("calcspillweights", - "Calculate spill weights"); +INITIALIZE_PASS(CalculateSpillWeights, "calcspillweights", + "Calculate spill weights", false, false); void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<LiveIntervals>(); @@ -41,108 +41,184 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { << "********** Function: " << fn.getFunction()->getName() << '\n'); - LiveIntervals *lis = &getAnalysis<LiveIntervals>(); - MachineLoopInfo *loopInfo = &getAnalysis<MachineLoopInfo>(); - const TargetInstrInfo *tii = fn.getTarget().getInstrInfo(); - MachineRegisterInfo *mri = &fn.getRegInfo(); - - SmallSet<unsigned, 4> processed; - for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end(); - mbbi != mbbe; ++mbbi) { - MachineBasicBlock* mbb = mbbi; - SlotIndex mbbEnd = lis->getMBBEndIdx(mbb); - MachineLoop* loop = loopInfo->getLoopFor(mbb); - unsigned loopDepth = loop ? loop->getLoopDepth() : 0; - bool isExiting = loop ? loop->isLoopExiting(mbb) : false; - - for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end(); - mii != mie; ++mii) { - const MachineInstr *mi = mii; - if (tii->isIdentityCopy(*mi) || mi->isImplicitDef() || mi->isDebugValue()) - continue; - - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - const MachineOperand &mopi = mi->getOperand(i); - if (!mopi.isReg() || mopi.getReg() == 0) - continue; - unsigned reg = mopi.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) - continue; - // Multiple uses of reg by the same instruction. It should not - // contribute to spill weight again. - if (!processed.insert(reg)) - continue; - - bool hasDef = mopi.isDef(); - bool hasUse = !hasDef; - for (unsigned j = i+1; j != e; ++j) { - const MachineOperand &mopj = mi->getOperand(j); - if (!mopj.isReg() || mopj.getReg() != reg) - continue; - hasDef |= mopj.isDef(); - hasUse |= mopj.isUse(); - if (hasDef && hasUse) - break; - } - - LiveInterval ®Int = lis->getInterval(reg); - float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth); - if (hasDef && isExiting) { - // Looks like this is a loop count variable update. - SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex(); - const LiveRange *dlr = - lis->getInterval(reg).getLiveRangeContaining(defIdx); - if (dlr->end >= mbbEnd) - weight *= 3.0F; - } - regInt.weight += weight; - } - processed.clear(); - } + LiveIntervals &lis = getAnalysis<LiveIntervals>(); + VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>()); + for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) { + LiveInterval &li = *I->second; + if (TargetRegisterInfo::isVirtualRegister(li.reg)) + vrai.CalculateWeightAndHint(li); + } + return false; +} + +// Return the preferred allocation register for reg, given a COPY instruction. +static unsigned copyHint(const MachineInstr *mi, unsigned reg, + const TargetRegisterInfo &tri, + const MachineRegisterInfo &mri) { + unsigned sub, hreg, hsub; + if (mi->getOperand(0).getReg() == reg) { + sub = mi->getOperand(0).getSubReg(); + hreg = mi->getOperand(1).getReg(); + hsub = mi->getOperand(1).getSubReg(); + } else { + sub = mi->getOperand(1).getSubReg(); + hreg = mi->getOperand(0).getReg(); + hsub = mi->getOperand(0).getSubReg(); } - for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) { - LiveInterval &li = *I->second; - if (TargetRegisterInfo::isVirtualRegister(li.reg)) { - // If the live interval length is essentially zero, i.e. in every live - // range the use follows def immediately, it doesn't make sense to spill - // it and hope it will be easier to allocate for this li. - if (isZeroLengthInterval(&li)) { - li.weight = HUGE_VALF; - continue; - } - - bool isLoad = false; - SmallVector<LiveInterval*, 4> spillIs; - if (lis->isReMaterializable(li, spillIs, isLoad)) { - // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If none of the defs are - // loads, then it's potentially very cheap to re-materialize. - // FIXME: this gets much more complicated once we support non-trivial - // re-materialization. - if (isLoad) - li.weight *= 0.9F; - else - li.weight *= 0.5F; - } - - // Slightly prefer live interval that has been assigned a preferred reg. - std::pair<unsigned, unsigned> Hint = mri->getRegAllocationHint(li.reg); - if (Hint.first || Hint.second) - li.weight *= 1.01F; - - lis->normalizeSpillWeight(li); + if (!hreg) + return 0; + + if (TargetRegisterInfo::isVirtualRegister(hreg)) + return sub == hsub ? hreg : 0; + + const TargetRegisterClass *rc = mri.getRegClass(reg); + + // Only allow physreg hints in rc. + if (sub == 0) + return rc->contains(hreg) ? hreg : 0; + + // reg:sub should match the physreg hreg. + return tri.getMatchingSuperReg(hreg, sub, rc); +} + +void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { + MachineRegisterInfo &mri = mf_.getRegInfo(); + const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo(); + MachineBasicBlock *mbb = 0; + MachineLoop *loop = 0; + unsigned loopDepth = 0; + bool isExiting = false; + float totalWeight = 0; + SmallPtrSet<MachineInstr*, 8> visited; + + // Find the best physreg hist and the best virtreg hint. + float bestPhys = 0, bestVirt = 0; + unsigned hintPhys = 0, hintVirt = 0; + + // Don't recompute a target specific hint. + bool noHint = mri.getRegAllocationHint(li.reg).first != 0; + + for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg); + MachineInstr *mi = I.skipInstruction();) { + if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) + continue; + if (!visited.insert(mi)) + continue; + + // Get loop info for mi. + if (mi->getParent() != mbb) { + mbb = mi->getParent(); + loop = loops_.getLoopFor(mbb); + loopDepth = loop ? loop->getLoopDepth() : 0; + isExiting = loop ? loop->isLoopExiting(mbb) : false; + } + + // Calculate instr weight. + bool reads, writes; + tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + float weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); + + // Give extra weight to what looks like a loop induction variable update. + if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb)) + weight *= 3; + + totalWeight += weight; + + // Get allocation hints from copies. + if (noHint || !mi->isCopy()) + continue; + unsigned hint = copyHint(mi, li.reg, tri, mri); + if (!hint) + continue; + float hweight = hint_[hint] += weight; + if (TargetRegisterInfo::isPhysicalRegister(hint)) { + if (hweight > bestPhys && lis_.isAllocatable(hint)) + bestPhys = hweight, hintPhys = hint; + } else { + if (hweight > bestVirt) + bestVirt = hweight, hintVirt = hint; } } - - return false; + + hint_.clear(); + + // Always prefer the physreg hint. + if (unsigned hint = hintPhys ? hintPhys : hintVirt) { + mri.setRegAllocationHint(li.reg, 0, hint); + // Weakly boost the spill weifght of hinted registers. + totalWeight *= 1.01F; + } + + // Mark li as unspillable if all live ranges are tiny. + if (li.isZeroLength()) { + li.markNotSpillable(); + return; + } + + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. If none of the defs are + // loads, then it's potentially very cheap to re-materialize. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + bool isLoad = false; + SmallVector<LiveInterval*, 4> spillIs; + if (lis_.isReMaterializable(li, spillIs, isLoad)) { + if (isLoad) + totalWeight *= 0.9F; + else + totalWeight *= 0.5F; + } + + li.weight = totalWeight; + lis_.normalizeSpillWeight(li); } -/// Returns true if the given live interval is zero length. -bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const { - for (LiveInterval::Ranges::const_iterator - i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) - if (i->end.getPrevIndex() > i->start) - return false; - return true; +void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { + MachineRegisterInfo &mri = mf_.getRegInfo(); + const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo(); + const TargetRegisterClass *orc = mri.getRegClass(reg); + SmallPtrSet<const TargetRegisterClass*,8> rcs; + + for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg), + E = mri.reg_nodbg_end(); I != E; ++I) { + // The targets don't have accurate enough regclass descriptions that we can + // handle subregs. We need something similar to + // TRI::getMatchingSuperRegClass, but returning a super class instead of a + // sub class. + if (I.getOperand().getSubReg()) { + DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n'); + return; + } + if (const TargetRegisterClass *rc = + I->getDesc().getRegClass(I.getOperandNo(), tri)) + rcs.insert(rc); + } + + // If we found no regclass constraints, just leave reg as is. + // In theory, we could inflate to the largest superclass of reg's existing + // class, but that might not be legal for the current cpu setting. + // This could happen if reg is only used by COPY instructions, so we may need + // to improve on this. + if (rcs.empty()) { + return; + } + + // Compute the intersection of all classes in rcs. + // This ought to be independent of iteration order, but if the target register + // classes don't form a proper algebra, it is possible to get different + // results. The solution is to make sure the intersection of any two register + // classes is also a register class or the null set. + const TargetRegisterClass *rc = 0; + for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(), + E = rcs.end(); I != E; ++I) { + rc = rc ? getCommonSubClass(rc, *I) : *I; + assert(rc && "Incompatible regclass constraints found"); + } + + if (rc == orc) + return; + DEBUG(dbgs() << "Inflating " << orc->getName() << ":%reg" << reg << " to " + << rc->getName() <<".\n"); + mri.setRegClass(reg, rc); } diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index e0e315c6c677f..91a9536e7757c 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -36,7 +36,7 @@ namespace { public: static char ID; - CodePlacementOpt() : MachineFunctionPass(&ID) {} + CodePlacementOpt() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index e3746a9856447..335d2d8e9bac7 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -32,21 +32,21 @@ CriticalAntiDepBreaker(MachineFunction& MFi) : MRI(MF.getRegInfo()), TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), - AllocatableSet(TRI->getAllocatableSet(MF)) -{ -} + AllocatableSet(TRI->getAllocatableSet(MF)), + Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)), + KillIndices(TRI->getNumRegs(), 0), + DefIndices(TRI->getNumRegs(), 0) {} CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { } void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { - // Clear out the register class data. - std::fill(Classes, array_endof(Classes), - static_cast<const TargetRegisterClass *>(0)); - - // Initialize the indices to indicate that no registers are live. const unsigned BBSize = BB->size(); - for (unsigned i = 0; i < TRI->getNumRegs(); ++i) { + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { + // Clear out the register class data. + Classes[i] = static_cast<const TargetRegisterClass *>(0); + + // Initialize the indices to indicate that no registers are live. KillIndices[i] = ~0u; DefIndices[i] = BBSize; } @@ -65,6 +65,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; + // Repeat, for all aliases. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; @@ -86,6 +87,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; + // Repeat, for all aliases. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; @@ -106,6 +108,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; + // Repeat, for all aliases. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; @@ -134,8 +137,10 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); + // Mark this register to be non-renamable. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + // Move the def index to the end of the previous region, to reflect // that the def could theoretically have been scheduled at the end. DefIndices[Reg] = InsertPosIndex; @@ -325,6 +330,8 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI, for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), RE = RC->allocation_order_end(MF); R != RE; ++R) { unsigned NewReg = *R; + // Don't consider non-allocatable registers + if (!AllocatableSet.test(NewReg)) continue; // Don't replace a register with itself. if (NewReg == AntiDepReg) continue; // Don't replace a register with one that was recently used to repair @@ -433,7 +440,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // fix that remaining critical edge too. This is a little more involved, // because unlike the most recent register, less recent registers should // still be considered, though only if no other registers are available. - unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; + std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0); // Attempt to break anti-dependence edges on the critical path. Walk the // instructions from the bottom up, tracking information about liveness diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 540630083bcc6..0ed7c35b0f0ca 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -46,19 +46,18 @@ class TargetRegisterInfo; /// corresponding value is null. If the register is live but used in /// multiple register classes, the corresponding value is -1 casted to a /// pointer. - const TargetRegisterClass * - Classes[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<const TargetRegisterClass*> Classes; /// RegRegs - Map registers to all their references within a live range. std::multimap<unsigned, MachineOperand *> RegRefs; /// KillIndices - The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. - unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> KillIndices; /// DefIndices - The index of the most recent complete def (proceding bottom /// up), or ~0u if the register is live. - unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> DefIndices; /// KeepRegs - A set of registers which are live and cannot be changed to /// break anti-dependencies. diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index d69c995b3e037..318d922adebf1 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -36,7 +36,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - DeadMachineInstructionElim() : MachineFunctionPass(&ID) {} + DeadMachineInstructionElim() : MachineFunctionPass(ID) {} private: bool isDead(const MachineInstr *MI) const; @@ -44,9 +44,8 @@ namespace { } char DeadMachineInstructionElim::ID = 0; -static RegisterPass<DeadMachineInstructionElim> -Y("dead-mi-elimination", - "Remove dead machine instructions"); +INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination", + "Remove dead machine instructions", false, false); FunctionPass *llvm::createDeadMachineInstructionElimPass() { return new DeadMachineInstructionElim(); @@ -81,9 +80,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); - // Compute a bitvector to represent all non-allocatable physregs. - BitVector NonAllocatableRegs = TRI->getAllocatableSet(MF); - NonAllocatableRegs.flip(); + // Treat reserved registers as always live. + BitVector ReservedRegs = TRI->getReservedRegs(MF); // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will @@ -92,9 +90,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { I != E; ++I) { MachineBasicBlock *MBB = &*I; - // Start out assuming that all non-allocatable registers are live - // out of this block. - LivePhysRegs = NonAllocatableRegs; + // Start out assuming that reserved registers are live out of this block. + LivePhysRegs = ReservedRegs; // Also add any explicit live-out physregs for this block. if (!MBB->empty() && MBB->back().getDesc().isReturn()) @@ -105,6 +102,10 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { LivePhysRegs.set(Reg); } + // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs + // are not live across blocks, but some targets (x86) can have flags live + // out of a block. + // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 01b31b4209318..550fd3e25fb7a 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -25,19 +25,17 @@ #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; STATISTIC(NumLandingPadsSplit, "Number of landing pads split"); STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered"); STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved"); -STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced"); namespace { class DwarfEHPrepare : public FunctionPass { const TargetMachine *TM; const TargetLowering *TLI; - bool CompileFast; // The eh.exception intrinsic. Function *ExceptionValueIntrinsic; @@ -54,9 +52,8 @@ namespace { // _Unwind_Resume or the target equivalent. Constant *RewindFunction; - // Dominator info is used when turning stack temporaries into registers. + // We both use and preserve dominator info. DominatorTree *DT; - DominanceFrontier *DF; // The function we are running on. Function *F; @@ -65,28 +62,14 @@ namespace { typedef SmallPtrSet<BasicBlock*, 8> BBSet; BBSet LandingPads; - // Stack temporary used to hold eh.exception values. - AllocaInst *ExceptionValueVar; - bool NormalizeLandingPads(); bool LowerUnwinds(); bool MoveExceptionValueCalls(); - bool FinishStackTemporaries(); - bool PromoteStackTemporaries(); Instruction *CreateExceptionValueCall(BasicBlock *BB); - Instruction *CreateValueLoad(BasicBlock *BB); - - /// CreateReadOfExceptionValue - Return the result of the eh.exception - /// intrinsic by calling the intrinsic if in a landing pad, or loading it - /// from the exception value variable otherwise. - Instruction *CreateReadOfExceptionValue(BasicBlock *BB) { - return LandingPads.count(BB) ? - CreateExceptionValueCall(BB) : CreateValueLoad(BB); - } /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still - /// use the ".llvm.eh.catch.all.value" call need to convert to using its + /// use the "llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels); @@ -112,69 +95,19 @@ namespace { bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, SmallPtrSet<IntrinsicInst*, 8> &SelCalls); - /// DoMem2RegPromotion - Take an alloca call and promote it from memory to a - /// register. - bool DoMem2RegPromotion(Value *V) { - AllocaInst *AI = dyn_cast<AllocaInst>(V); - if (!AI || !isAllocaPromotable(AI)) return false; - - // Turn the alloca into a register. - std::vector<AllocaInst*> Allocas(1, AI); - PromoteMemToReg(Allocas, *DT, *DF); - return true; - } - - /// PromoteStoreInst - Perform Mem2Reg on a StoreInst. - bool PromoteStoreInst(StoreInst *SI) { - if (!SI || !DT || !DF) return false; - if (DoMem2RegPromotion(SI->getOperand(1))) - return true; - return false; - } - - /// PromoteEHPtrStore - Promote the storing of an EH pointer into a - /// register. This should get rid of the store and subsequent loads. - bool PromoteEHPtrStore(IntrinsicInst *II) { - if (!DT || !DF) return false; - - bool Changed = false; - StoreInst *SI; - - while (1) { - SI = 0; - for (Value::use_iterator - I = II->use_begin(), E = II->use_end(); I != E; ++I) { - SI = dyn_cast<StoreInst>(I); - if (SI) break; - } - - if (!PromoteStoreInst(SI)) - break; - - Changed = true; - } - - return Changed; - } - public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare(const TargetMachine *tm, bool fast) : - FunctionPass(&ID), TM(tm), TLI(TM->getTargetLowering()), - CompileFast(fast), + DwarfEHPrepare(const TargetMachine *tm) : + FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()), ExceptionValueIntrinsic(0), SelectorIntrinsic(0), URoR(0), EHCatchAllValue(0), RewindFunction(0) {} virtual bool runOnFunction(Function &Fn); - // getAnalysisUsage - We need dominance frontiers for memory promotion. + // getAnalysisUsage - We need the dominator tree for handling URoR. virtual void getAnalysisUsage(AnalysisUsage &AU) const { - if (!CompileFast) - AU.addRequired<DominatorTree>(); + AU.addRequired<DominatorTree>(); AU.addPreserved<DominatorTree>(); - if (!CompileFast) - AU.addRequired<DominanceFrontier>(); - AU.addPreserved<DominanceFrontier>(); } const char *getPassName() const { @@ -186,8 +119,8 @@ namespace { char DwarfEHPrepare::ID = 0; -FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) { - return new DwarfEHPrepare(tm, fast); +FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { + return new DwarfEHPrepare(tm); } /// HasCatchAllInSelector - Return true if the intrinsic instruction has a @@ -207,7 +140,7 @@ FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels, for (Value::use_iterator I = SelectorIntrinsic->use_begin(), E = SelectorIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *II = cast<IntrinsicInst>(I); + IntrinsicInst *II = cast<IntrinsicInst>(*I); if (II->getParent()->getParent() != F) continue; @@ -225,13 +158,13 @@ FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) { for (Value::use_iterator I = URoR->use_begin(), E = URoR->use_end(); I != E; ++I) { - if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + if (InvokeInst *II = dyn_cast<InvokeInst>(*I)) URoRInvokes.insert(II); } } /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use -/// the ".llvm.eh.catch.all.value" call need to convert to using its +/// the "llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) { if (!EHCatchAllValue) return false; @@ -247,7 +180,7 @@ bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) { I = Sels.begin(), E = Sels.end(); I != E; ++I) { IntrinsicInst *Sel = *I; - // Index of the ".llvm.eh.catch.all.value" variable. + // Index of the "llvm.eh.catch.all.value" variable. unsigned OpIdx = Sel->getNumArgOperands() - 1; GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx)); if (GV != EHCatchAllValue) continue; @@ -268,10 +201,9 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, SmallPtrSet<PHINode*, 32> SeenPHIs; bool Changed = false; - restart: for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) { - Instruction *II = dyn_cast<Instruction>(I); + Instruction *II = dyn_cast<Instruction>(*I); if (!II || II->getParent()->getParent() != F) continue; if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) { @@ -282,11 +214,6 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, URoRInvoke = true; } else if (CastInst *CI = dyn_cast<CastInst>(II)) { Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls); - } else if (StoreInst *SI = dyn_cast<StoreInst>(II)) { - if (!PromoteStoreInst(SI)) continue; - Changed = true; - SeenPHIs.clear(); - goto restart; // Uses may have changed, restart loop. } else if (PHINode *PN = dyn_cast<PHINode>(II)) { if (SeenPHIs.insert(PN)) // Don't process a PHI node more than once. @@ -304,7 +231,7 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, bool DwarfEHPrepare::HandleURoRInvokes() { if (!EHCatchAllValue) { EHCatchAllValue = - F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value"); + F->getParent()->getNamedGlobal("llvm.eh.catch.all.value"); if (!EHCatchAllValue) return false; } @@ -318,10 +245,6 @@ bool DwarfEHPrepare::HandleURoRInvokes() { SmallPtrSet<IntrinsicInst*, 32> CatchAllSels; FindAllCleanupSelectors(Sels, CatchAllSels); - if (!DT) - // We require DominatorTree information. - return CleanupSelectors(CatchAllSels); - if (!URoR) { URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); if (!URoR) return CleanupSelectors(CatchAllSels); @@ -338,7 +261,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() { for (SmallPtrSet<InvokeInst*, 32>::iterator UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) { const BasicBlock *URoRBB = (*UI)->getParent(); - if (SelBB == URoRBB || DT->dominates(SelBB, URoRBB)) { + if (DT->dominates(SelBB, URoRBB)) { SelsToConvert.insert(*SI); break; } @@ -360,11 +283,9 @@ bool DwarfEHPrepare::HandleURoRInvokes() { for (Value::use_iterator I = ExceptionValueIntrinsic->use_begin(), E = ExceptionValueIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(I); + IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I); if (!EHPtr || EHPtr->getParent()->getParent() != F) continue; - Changed |= PromoteEHPtrStore(EHPtr); - bool URoRInvoke = false; SmallPtrSet<IntrinsicInst*, 8> SelCalls; Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls); @@ -532,11 +453,8 @@ bool DwarfEHPrepare::NormalizeLandingPads() { // Add a fallthrough from NewBB to the original landing pad. BranchInst::Create(LPad, NewBB); - // Now update DominatorTree and DominanceFrontier analysis information. - if (DT) - DT->splitBlock(NewBB); - if (DF) - DF->splitBlock(NewBB); + // Now update DominatorTree analysis information. + DT->splitBlock(NewBB); // Remember the newly constructed landing pad. The original landing pad // LPad is no longer a landing pad now that all unwind edges have been @@ -586,7 +504,7 @@ bool DwarfEHPrepare::LowerUnwinds() { // Create the call... CallInst *CI = CallInst::Create(RewindFunction, - CreateReadOfExceptionValue(TI->getParent()), + CreateExceptionValueCall(TI->getParent()), "", TI); CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); // ...followed by an UnreachableInst. @@ -602,9 +520,11 @@ bool DwarfEHPrepare::LowerUnwinds() { } /// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from -/// landing pads by replacing calls outside of landing pads with loads from a -/// stack temporary. Move eh.exception calls inside landing pads to the start -/// of the landing pad (optional, but may make things simpler for later passes). +/// landing pads by replacing calls outside of landing pads with direct use of +/// a register holding the appropriate value; this requires adding calls inside +/// all landing pads to initialize the register. Also, move eh.exception calls +/// inside landing pads to the start of the landing pad (optional, but may make +/// things simpler for later passes). bool DwarfEHPrepare::MoveExceptionValueCalls() { // If the eh.exception intrinsic is not declared in the module then there is // nothing to do. Speed up compilation by checking for this common case. @@ -614,61 +534,87 @@ bool DwarfEHPrepare::MoveExceptionValueCalls() { bool Changed = false; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) - if (CI->getIntrinsicID() == Intrinsic::eh_exception) { - if (!CI->use_empty()) { - Value *ExceptionValue = CreateReadOfExceptionValue(BB); - if (CI == ExceptionValue) { - // The call was at the start of a landing pad - leave it alone. - assert(LandingPads.count(BB) && - "Created eh.exception call outside landing pad!"); - continue; - } - CI->replaceAllUsesWith(ExceptionValue); - } - CI->eraseFromParent(); - ++NumExceptionValuesMoved; - Changed = true; + // Move calls to eh.exception that are inside a landing pad to the start of + // the landing pad. + for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end(); + LI != LE; ++LI) { + BasicBlock *LP = *LI; + for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end(); + II != IE;) + if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) { + // Found a call to eh.exception. + if (!EI->use_empty()) { + // If there is already a call to eh.exception at the start of the + // landing pad, then get hold of it; otherwise create such a call. + Value *CallAtStart = CreateExceptionValueCall(LP); + + // If the call was at the start of a landing pad then leave it alone. + if (EI == CallAtStart) + continue; + EI->replaceAllUsesWith(CallAtStart); } + EI->eraseFromParent(); + ++NumExceptionValuesMoved; + Changed = true; + } } - return Changed; -} - -/// FinishStackTemporaries - If we introduced a stack variable to hold the -/// exception value then initialize it in each landing pad. -bool DwarfEHPrepare::FinishStackTemporaries() { - if (!ExceptionValueVar) - // Nothing to do. - return false; + // Look for calls to eh.exception that are not in a landing pad. If one is + // found, then a register that holds the exception value will be created in + // each landing pad, and the SSAUpdater will be used to compute the values + // returned by eh.exception calls outside of landing pads. + SSAUpdater SSA; + + // Remember where we found the eh.exception call, to avoid rescanning earlier + // basic blocks which we already know contain no eh.exception calls. + bool FoundCallOutsideLandingPad = false; + Function::iterator BB = F->begin(); + for (Function::iterator BE = F->end(); BB != BE; ++BB) { + // Skip over landing pads. + if (LandingPads.count(BB)) + continue; - bool Changed = false; + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE; ++II) + if (isa<EHExceptionInst>(II)) { + SSA.Initialize(II->getType(), II->getName()); + FoundCallOutsideLandingPad = true; + break; + } - // Make sure that there is a store of the exception value at the start of - // each landing pad. - for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end(); - LI != LE; ++LI) { - Instruction *ExceptionValue = CreateReadOfExceptionValue(*LI); - Instruction *Store = new StoreInst(ExceptionValue, ExceptionValueVar); - Store->insertAfter(ExceptionValue); - Changed = true; + if (FoundCallOutsideLandingPad) + break; } - return Changed; -} + // If all calls to eh.exception are in landing pads then we are done. + if (!FoundCallOutsideLandingPad) + return Changed; -/// PromoteStackTemporaries - Turn any stack temporaries we introduced into -/// registers if possible. -bool DwarfEHPrepare::PromoteStackTemporaries() { - if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) { - // Turn the exception temporary into registers and phi nodes if possible. - std::vector<AllocaInst*> Allocas(1, ExceptionValueVar); - PromoteMemToReg(Allocas, *DT, *DF); - return true; + // Add a call to eh.exception at the start of each landing pad, and tell the + // SSAUpdater that this is the value produced by the landing pad. + for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end(); + LI != LE; ++LI) + SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI)); + + // Now turn all calls to eh.exception that are not in a landing pad into a use + // of the appropriate register. + for (Function::iterator BE = F->end(); BB != BE; ++BB) { + // Skip over landing pads. + if (LandingPads.count(BB)) + continue; + + for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); + II != IE;) + if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) { + // Found a call to eh.exception, replace it with the value from any + // upstream landing pad(s). + EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB)); + EI->eraseFromParent(); + ++NumExceptionValuesMoved; + } } - return false; + + return true; } /// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at @@ -691,36 +637,11 @@ Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) { return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start); } -/// CreateValueLoad - Insert a load of the exception value stack variable -/// (creating it if necessary) at the start of the basic block (unless -/// there already is a load, in which case the existing load is returned). -Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) { - Instruction *Start = BB->getFirstNonPHIOrDbg(); - // Is this a load of the exception temporary? - if (ExceptionValueVar) - if (LoadInst* LI = dyn_cast<LoadInst>(Start)) - if (LI->getPointerOperand() == ExceptionValueVar) - // Reuse the existing load. - return Start; - - // Create the temporary if we didn't already. - if (!ExceptionValueVar) { - ExceptionValueVar = new AllocaInst(PointerType::getUnqual( - Type::getInt8Ty(BB->getContext())), "eh.value", F->begin()->begin()); - ++NumStackTempsIntroduced; - } - - // Load the value. - return new LoadInst(ExceptionValueVar, "eh.value.load", Start); -} - bool DwarfEHPrepare::runOnFunction(Function &Fn) { bool Changed = false; // Initialize internal state. - DT = getAnalysisIfAvailable<DominatorTree>(); - DF = getAnalysisIfAvailable<DominanceFrontier>(); - ExceptionValueVar = 0; + DT = &getAnalysis<DominatorTree>(); F = &Fn; // Ensure that only unwind edges end at landing pads (a landing pad is a @@ -735,13 +656,6 @@ bool DwarfEHPrepare::runOnFunction(Function &Fn) { // Move eh.exception calls to landing pads. Changed |= MoveExceptionValueCalls(); - // Initialize any stack temporaries we introduced. - Changed |= FinishStackTemporaries(); - - // Turn any stack temporaries into registers if possible. - if (!CompileFast) - Changed |= PromoteStackTemporaries(); - Changed |= HandleURoRInvokes(); LandingPads.clear(); diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h index cb5a8c0eae1d3..fb884c9e8b712 100644 --- a/lib/CodeGen/ELF.h +++ b/lib/CodeGen/ELF.h @@ -22,36 +22,12 @@ #include "llvm/CodeGen/BinaryObject.h" #include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/Support/ELF.h" #include "llvm/System/DataTypes.h" namespace llvm { class GlobalValue; - // Identification Indexes - enum { - EI_MAG0 = 0, - EI_MAG1 = 1, - EI_MAG2 = 2, - EI_MAG3 = 3 - }; - - // File types - enum { - ET_NONE = 0, // No file type - ET_REL = 1, // Relocatable file - ET_EXEC = 2, // Executable file - ET_DYN = 3, // Shared object file - ET_CORE = 4, // Core file - ET_LOPROC = 0xff00, // Beginning of processor-specific codes - ET_HIPROC = 0xffff // Processor-specific - }; - - // Versioning - enum { - EV_NONE = 0, - EV_CURRENT = 1 - }; - /// ELFSym - This struct contains information about each symbol that is /// added to logical symbol table for the module. This is eventually /// turned into a real symbol table in the file. @@ -108,9 +84,9 @@ namespace llvm { static ELFSym *getExtSym(const char *Ext) { ELFSym *Sym = new ELFSym(); Sym->Source.Ext = Ext; - Sym->setBind(STB_GLOBAL); - Sym->setType(STT_NOTYPE); - Sym->setVisibility(STV_DEFAULT); + Sym->setBind(ELF::STB_GLOBAL); + Sym->setType(ELF::STT_NOTYPE); + Sym->setVisibility(ELF::STV_DEFAULT); Sym->SourceType = isExtSym; return Sym; } @@ -118,9 +94,9 @@ namespace llvm { // getSectionSym - Returns a elf symbol to represent an elf section static ELFSym *getSectionSym() { ELFSym *Sym = new ELFSym(); - Sym->setBind(STB_LOCAL); - Sym->setType(STT_SECTION); - Sym->setVisibility(STV_DEFAULT); + Sym->setBind(ELF::STB_LOCAL); + Sym->setType(ELF::STT_SECTION); + Sym->setVisibility(ELF::STV_DEFAULT); Sym->SourceType = isOther; return Sym; } @@ -128,9 +104,9 @@ namespace llvm { // getFileSym - Returns a elf symbol to represent the module identifier static ELFSym *getFileSym() { ELFSym *Sym = new ELFSym(); - Sym->setBind(STB_LOCAL); - Sym->setType(STT_FILE); - Sym->setVisibility(STV_DEFAULT); + Sym->setBind(ELF::STB_LOCAL); + Sym->setType(ELF::STT_FILE); + Sym->setVisibility(ELF::STV_DEFAULT); Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS; Sym->SourceType = isOther; return Sym; @@ -141,8 +117,8 @@ namespace llvm { ELFSym *Sym = new ELFSym(); Sym->Source.GV = GV; Sym->setBind(Bind); - Sym->setType(STT_NOTYPE); - Sym->setVisibility(STV_DEFAULT); + Sym->setType(ELF::STT_NOTYPE); + Sym->setVisibility(ELF::STV_DEFAULT); Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF; Sym->SourceType = isGV; return Sym; @@ -159,35 +135,14 @@ namespace llvm { // Symbol index into the Symbol table unsigned SymTabIdx; - enum { - STB_LOCAL = 0, // Local sym, not visible outside obj file containing def - STB_GLOBAL = 1, // Global sym, visible to all object files being combined - STB_WEAK = 2 // Weak symbol, like global but lower-precedence - }; - - enum { - STT_NOTYPE = 0, // Symbol's type is not specified - STT_OBJECT = 1, // Symbol is a data object (variable, array, etc.) - STT_FUNC = 2, // Symbol is executable code (function, etc.) - STT_SECTION = 3, // Symbol refers to a section - STT_FILE = 4 // Local, absolute symbol that refers to a file - }; - - enum { - STV_DEFAULT = 0, // Visibility is specified by binding type - STV_INTERNAL = 1, // Defined by processor supplements - STV_HIDDEN = 2, // Not visible to other components - STV_PROTECTED = 3 // Visible in other components but not preemptable - }; - ELFSym() : SourceType(isOther), NameIdx(0), Value(0), - Size(0), Info(0), Other(STV_DEFAULT), SectionIdx(0), + Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0), SymTabIdx(0) {} unsigned getBind() const { return (Info >> 4) & 0xf; } unsigned getType() const { return Info & 0xf; } - bool isLocalBind() const { return getBind() == STB_LOCAL; } - bool isFileType() const { return getType() == STT_FILE; } + bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; } + bool isFileType() const { return getType() == ELF::STT_FILE; } void setBind(unsigned X) { assert(X == (X & 0xF) && "Bind value out of range!"); @@ -222,51 +177,6 @@ namespace llvm { unsigned Align; // sh_addralign - Alignment of section. unsigned EntSize; // sh_entsize - Size of entries in the section e - // Section Header Flags - enum { - SHF_WRITE = 1 << 0, // Writable - SHF_ALLOC = 1 << 1, // Mapped into the process addr space - SHF_EXECINSTR = 1 << 2, // Executable - SHF_MERGE = 1 << 4, // Might be merged if equal - SHF_STRINGS = 1 << 5, // Contains null-terminated strings - SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index - SHF_LINK_ORDER = 1 << 7, // Preserve order after combining - SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required - SHF_GROUP = 1 << 9, // Section is a member of a group - SHF_TLS = 1 << 10 // Section holds thread-local data - }; - - // Section Types - enum { - SHT_NULL = 0, // No associated section (inactive entry). - SHT_PROGBITS = 1, // Program-defined contents. - SHT_SYMTAB = 2, // Symbol table. - SHT_STRTAB = 3, // String table. - SHT_RELA = 4, // Relocation entries; explicit addends. - SHT_HASH = 5, // Symbol hash table. - SHT_DYNAMIC = 6, // Information for dynamic linking. - SHT_NOTE = 7, // Information about the file. - SHT_NOBITS = 8, // Data occupies no space in the file. - SHT_REL = 9, // Relocation entries; no explicit addends. - SHT_SHLIB = 10, // Reserved. - SHT_DYNSYM = 11, // Symbol table. - SHT_LOPROC = 0x70000000, // Lowest processor arch-specific type. - SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type. - SHT_LOUSER = 0x80000000, // Lowest type reserved for applications. - SHT_HIUSER = 0xffffffff // Highest type reserved for applications. - }; - - // Special section indices. - enum { - SHN_UNDEF = 0, // Undefined, missing, irrelevant - SHN_LORESERVE = 0xff00, // Lowest reserved index - SHN_LOPROC = 0xff00, // Lowest processor-specific index - SHN_HIPROC = 0xff1f, // Highest processor-specific index - SHN_ABS = 0xfff1, // Symbol has absolute value; no relocation - SHN_COMMON = 0xfff2, // FORTRAN COMMON or C external global variables - SHN_HIRESERVE = 0xffff // Highest reserved index - }; - /// SectionIdx - The number of the section in the Section Table. unsigned short SectionIdx; diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index 36b0e6514b3a9..3fb087c5ea8b8 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -71,7 +71,7 @@ void ELFCodeEmitter::startFunction(MachineFunction &MF) { bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { // Add a symbol to represent the function. const Function *F = MF.getFunction(); - ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELFSym::STT_FUNC, + ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC, EW.getGlobalELFVisibility(F)); FnSym->SectionIdx = ES->SectionIdx; FnSym->Size = ES->getCurrentPCOffset()-FnStartOff; diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index b644ebeb4be53..d14728d8a36c9 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -63,7 +63,7 @@ char ELFWriter::ID = 0; //===----------------------------------------------------------------------===// ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) - : MachineFunctionPass(&ID), O(o), TM(tm), + : MachineFunctionPass(ID), O(o), TM(tm), OutContext(*new MCContext(*TM.getMCAsmInfo())), TLOF(TM.getTargetLowering()->getObjFileLowering()), is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64), @@ -129,12 +129,12 @@ bool ELFWriter::doInitialization(Module &M) { ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS] ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA] - ElfHdr.emitByte(EV_CURRENT); // e_ident[EI_VERSION] + ElfHdr.emitByte(ELF::EV_CURRENT); // e_ident[EI_VERSION] ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD] - ElfHdr.emitWord16(ET_REL); // e_type + ElfHdr.emitWord16(ELF::ET_REL); // e_type ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target - ElfHdr.emitWord32(EV_CURRENT); // e_version + ElfHdr.emitWord32(ELF::EV_CURRENT); // e_version ElfHdr.emitWord(0); // e_entry, no entry point in .o file ElfHdr.emitWord(0); // e_phoff, no program header for .o ELFHdr_e_shoff_Offset = ElfHdr.size(); @@ -252,7 +252,7 @@ ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) { // is true if the relocation section contains entries with addends. ELFSection &ELFWriter::getRelocSection(ELFSection &S) { unsigned SectionType = TEW->hasRelocationAddend() ? - ELFSection::SHT_RELA : ELFSection::SHT_REL; + ELF::SHT_RELA : ELF::SHT_REL; std::string SectionName(".rel"); if (TEW->hasRelocationAddend()) @@ -268,11 +268,11 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) { default: llvm_unreachable("unknown visibility type"); case GlobalValue::DefaultVisibility: - return ELFSym::STV_DEFAULT; + return ELF::STV_DEFAULT; case GlobalValue::HiddenVisibility: - return ELFSym::STV_HIDDEN; + return ELF::STV_HIDDEN; case GlobalValue::ProtectedVisibility: - return ELFSym::STV_PROTECTED; + return ELF::STV_PROTECTED; } return 0; } @@ -280,23 +280,23 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) { // getGlobalELFBinding - Returns the ELF specific binding type unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) { if (GV->hasInternalLinkage()) - return ELFSym::STB_LOCAL; + return ELF::STB_LOCAL; if (GV->isWeakForLinker() && !GV->hasCommonLinkage()) - return ELFSym::STB_WEAK; + return ELF::STB_WEAK; - return ELFSym::STB_GLOBAL; + return ELF::STB_GLOBAL; } // getGlobalELFType - Returns the ELF specific type for a global unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) { if (GV->isDeclaration()) - return ELFSym::STT_NOTYPE; + return ELF::STT_NOTYPE; if (isa<Function>(GV)) - return ELFSym::STT_FUNC; + return ELF::STT_FUNC; - return ELFSym::STT_OBJECT; + return ELF::STT_OBJECT; } // IsELFUndefSym - True if the global value must be marked as a symbol @@ -364,7 +364,7 @@ void ELFWriter::EmitGlobal(const GlobalValue *GV) { GblSym->Size = Size; if (S->HasCommonSymbols()) { // Symbol must go to a common section - GblSym->SectionIdx = ELFSection::SHN_COMMON; + GblSym->SectionIdx = ELF::SHN_COMMON; // A new linkonce section is created for each global in the // common section, the default alignment is 1 and the symbol diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h index db66ecc6dd83b..b8bac5598ecfc 100644 --- a/lib/CodeGen/ELFWriter.h +++ b/lib/CodeGen/ELFWriter.h @@ -39,6 +39,7 @@ namespace llvm { class raw_ostream; class SectionKind; class MCContext; + class TargetMachine; typedef std::vector<ELFSym*>::iterator ELFSymIter; typedef std::vector<ELFSection*>::iterator ELFSectionIter; @@ -160,29 +161,29 @@ namespace llvm { SN->SectionIdx = NumSections++; SN->Type = Type; SN->Flags = Flags; - SN->Link = ELFSection::SHN_UNDEF; + SN->Link = ELF::SHN_UNDEF; SN->Align = Align; return *SN; } ELFSection &getNonExecStackSection() { - return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1); + return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1); } ELFSection &getSymbolTableSection() { - return getSection(".symtab", ELFSection::SHT_SYMTAB, 0); + return getSection(".symtab", ELF::SHT_SYMTAB, 0); } ELFSection &getStringTableSection() { - return getSection(".strtab", ELFSection::SHT_STRTAB, 0, 1); + return getSection(".strtab", ELF::SHT_STRTAB, 0, 1); } ELFSection &getSectionHeaderStringTableSection() { - return getSection(".shstrtab", ELFSection::SHT_STRTAB, 0, 1); + return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1); } ELFSection &getNullSection() { - return getSection("", ELFSection::SHT_NULL, 0); + return getSection("", ELF::SHT_NULL, 0); } ELFSection &getDataSection(); diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index ab0a800225311..0f6e882a7be43 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -30,8 +30,8 @@ namespace { raw_ostream &OS; public: - Printer() : FunctionPass(&ID), OS(errs()) {} - explicit Printer(raw_ostream &OS) : FunctionPass(&ID), OS(OS) {} + Printer() : FunctionPass(ID), OS(errs()) {} + explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} const char *getPassName() const; @@ -55,8 +55,8 @@ namespace { } -static RegisterPass<GCModuleInfo> -X("collector-metadata", "Create Garbage Collector Module Metadata"); +INITIALIZE_PASS(GCModuleInfo, "collector-metadata", + "Create Garbage Collector Module Metadata", false, false); // ----------------------------------------------------------------------------- @@ -70,7 +70,7 @@ GCFunctionInfo::~GCFunctionInfo() {} char GCModuleInfo::ID = 0; GCModuleInfo::GCModuleInfo() - : ImmutablePass(&ID) {} + : ImmutablePass(ID) {} GCModuleInfo::~GCModuleInfo() { clear(); @@ -189,7 +189,7 @@ FunctionPass *llvm::createGCInfoDeleter() { return new Deleter(); } -Deleter::Deleter() : FunctionPass(&ID) {} +Deleter::Deleter() : FunctionPass(ID) {} const char *Deleter::getPassName() const { return "Delete Garbage Collector Information"; diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 71506cc6abb9c..719fa194d8da1 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -130,7 +130,7 @@ FunctionPass *llvm::createGCLoweringPass() { char LowerIntrinsics::ID = 0; LowerIntrinsics::LowerIntrinsics() - : FunctionPass(&ID) {} + : FunctionPass(ID) {} const char *LowerIntrinsics::getPassName() const { return "Lower Garbage Collection Instructions"; @@ -260,7 +260,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { bool LowerRd = !S.customReadBarrier(); bool InitRoots = S.initializeRoots(); - SmallVector<AllocaInst*,32> Roots; + SmallVector<AllocaInst*, 32> Roots; bool MadeChange = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { @@ -271,7 +271,8 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { case Intrinsic::gcwrite: if (LowerWr) { // Replace a write barrier with a simple store. - Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI); + Value *St = new StoreInst(CI->getArgOperand(0), + CI->getArgOperand(2), CI); CI->replaceAllUsesWith(St); CI->eraseFromParent(); } @@ -317,7 +318,7 @@ FunctionPass *llvm::createGCMachineCodeAnalysisPass() { char MachineCodeAnalysis::ID = 0; MachineCodeAnalysis::MachineCodeAnalysis() - : MachineFunctionPass(&ID) {} + : MachineFunctionPass(ID) {} const char *MachineCodeAnalysis::getPassName() const { return "Analyze Machine Code For Garbage Collection"; diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 6b445e0b8e0f0..0ea30d7a7929d 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -154,7 +154,7 @@ namespace { int FnNum; public: static char ID; - IfConverter() : MachineFunctionPass(&ID), FnNum(-1) {} + IfConverter() : MachineFunctionPass(ID), FnNum(-1) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "If Converter"; } @@ -230,8 +230,7 @@ namespace { char IfConverter::ID = 0; } -static RegisterPass<IfConverter> -X("if-converter", "If Converter"); +INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false); FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 12adcaa3a22ee..b965bfdcf3b84 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -14,10 +14,12 @@ #define DEBUG_TYPE "spiller" #include "Spiller.h" +#include "SplitKit.h" #include "VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -28,8 +30,10 @@ using namespace llvm; namespace { class InlineSpiller : public Spiller { + MachineFunctionPass &pass_; MachineFunction &mf_; LiveIntervals &lis_; + MachineLoopInfo &loops_; VirtRegMap &vrm_; MachineFrameInfo &mfi_; MachineRegisterInfo &mri_; @@ -37,9 +41,11 @@ class InlineSpiller : public Spiller { const TargetRegisterInfo &tri_; const BitVector reserved_; + SplitAnalysis splitAnalysis_; + // Variables that are valid during spill(), but used by multiple methods. LiveInterval *li_; - std::vector<LiveInterval*> *newIntervals_; + SmallVectorImpl<LiveInterval*> *newIntervals_; const TargetRegisterClass *rc_; int stackSlot_; const SmallVectorImpl<LiveInterval*> *spillIs_; @@ -53,25 +59,34 @@ class InlineSpiller : public Spiller { ~InlineSpiller() {} public: - InlineSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) - : mf_(*mf), lis_(*lis), vrm_(*vrm), - mfi_(*mf->getFrameInfo()), - mri_(mf->getRegInfo()), - tii_(*mf->getTarget().getInstrInfo()), - tri_(*mf->getTarget().getRegisterInfo()), - reserved_(tri_.getReservedRegs(mf_)) {} + InlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) + : pass_(pass), + mf_(mf), + lis_(pass.getAnalysis<LiveIntervals>()), + loops_(pass.getAnalysis<MachineLoopInfo>()), + vrm_(vrm), + mfi_(*mf.getFrameInfo()), + mri_(mf.getRegInfo()), + tii_(*mf.getTarget().getInstrInfo()), + tri_(*mf.getTarget().getRegisterInfo()), + reserved_(tri_.getReservedRegs(mf_)), + splitAnalysis_(mf, lis_, loops_) {} void spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex *earliestIndex); + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs); private: + bool split(); + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, SlotIndex UseIdx); bool reMaterializeFor(MachineBasicBlock::iterator MI); void reMaterializeAll(); + bool coalesceStackAccess(MachineInstr *MI); bool foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl<unsigned> &Ops); void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI); @@ -80,12 +95,43 @@ private: } namespace llvm { -Spiller *createInlineSpiller(MachineFunction *mf, - LiveIntervals *lis, - const MachineLoopInfo *mli, - VirtRegMap *vrm) { - return new InlineSpiller(mf, lis, vrm); +Spiller *createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) { + return new InlineSpiller(pass, mf, vrm); +} } + +/// split - try splitting the current interval into pieces that may allocate +/// separately. Return true if successful. +bool InlineSpiller::split() { + splitAnalysis_.analyze(li_); + + if (const MachineLoop *loop = splitAnalysis_.getBestSplitLoop()) { + // We can split, but li_ may be left intact with fewer uses. + if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) + .splitAroundLoop(loop)) + return true; + } + + // Try splitting into single block intervals. + SplitAnalysis::BlockPtrSet blocks; + if (splitAnalysis_.getMultiUseBlocks(blocks)) { + if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) + .splitSingleBlocks(blocks)) + return true; + } + + // Try splitting inside a basic block. + if (const MachineBasicBlock *MBB = splitAnalysis_.getBlockForInsideSplit()) { + if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) + .splitInsideBlock(MBB)) + return true; + } + + // We may have been able to split out some uses, but the original interval is + // intact, and it should still be spilled. + return false; } /// allUsesAvailableAt - Return true if all registers used by OrigMI at @@ -237,7 +283,7 @@ void InlineSpiller::reMaterializeAll() { lis_.RemoveMachineInstrFromMaps(DefMI); vrm_.RemoveMachineInstrFromMaps(DefMI); DefMI->eraseFromParent(); - li_->removeValNo(VNI); + VNI->setIsDefAccurate(false); anyRemoved = true; } @@ -253,8 +299,8 @@ void InlineSpiller::reMaterializeAll() { MachineBasicBlock::iterator NextMI = MI; ++NextMI; if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { - SlotIndex NearIdx = lis_.getInstructionIndex(NextMI); - if (li_->liveAt(NearIdx)) + VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI)); + if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI))) continue; } DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI); @@ -262,6 +308,24 @@ void InlineSpiller::reMaterializeAll() { } } +/// If MI is a load or store of stackSlot_, it can be removed. +bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) { + int FI = 0; + unsigned reg; + if (!(reg = tii_.isLoadFromStackSlot(MI, FI)) && + !(reg = tii_.isStoreToStackSlot(MI, FI))) + return false; + + // We have a stack access. Is it the right register and slot? + if (reg != li_->reg || FI != stackSlot_) + return false; + + DEBUG(dbgs() << "Coalescing stack access: " << *MI); + lis_.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + return true; +} + /// foldMemoryOperand - Try folding stack slot references in Ops into MI. /// Return true on success, and MI will be erased. bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, @@ -323,9 +387,8 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, } void InlineSpiller::spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex *earliestIndex) { + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) { DEBUG(dbgs() << "Inline spilling " << *li << "\n"); assert(li->isSpillable() && "Attempting to spill already spilled value."); assert(!li->isStackSlot() && "Trying to spill a stack slot."); @@ -335,13 +398,18 @@ void InlineSpiller::spill(LiveInterval *li, rc_ = mri_.getRegClass(li->reg); spillIs_ = &spillIs; + if (split()) + return; + reMaterializeAll(); // Remat may handle everything. if (li_->empty()) return; - stackSlot_ = vrm_.assignVirt2StackSlot(li->reg); + stackSlot_ = vrm_.getStackSlot(li->reg); + if (stackSlot_ == VirtRegMap::NO_STACK_SLOT) + stackSlot_ = vrm_.assignVirt2StackSlot(li->reg); // Iterate over instructions using register. for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg); @@ -365,6 +433,10 @@ void InlineSpiller::spill(LiveInterval *li, continue; } + // Stack slot accesses may coalesce away. + if (coalesceStackAccess(MI)) + continue; + // Analyze instruction. bool Reads, Writes; SmallVector<unsigned, 8> Ops; diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 03ae214ae7dab..3852ebaf64253 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -481,7 +481,8 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { Value *Ops[3]; Ops[0] = CI->getArgOperand(0); // Extend the amount to i32. - Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context), + Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), + Type::getInt32Ty(Context), /* isSigned */ false); Ops[2] = Size; ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index bf3137e49536e..36038027b2594 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -85,7 +85,7 @@ static bool getVerboseAsm() { case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault(); case cl::BOU_TRUE: return true; case cl::BOU_FALSE: return false; - } + } } // Enable or disable FastISel. Both options are needed, because @@ -139,8 +139,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI); // Create a code emitter if asked to show the encoding. - // - // FIXME: These are currently leaked. MCCodeEmitter *MCE = 0; if (ShowMCEncoding) MCE = getTarget().createCodeEmitter(*this, *Context); @@ -154,8 +152,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - // - // FIXME: These are currently leaked. MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context); TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); if (MCE == 0 || TAB == 0) @@ -180,12 +176,12 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); if (Printer == 0) return true; - + // If successful, createAsmPrinter took ownership of AsmStreamer. AsmStreamer.take(); - + PM.add(Printer); - + // Make sure the code model is set. setCodeModelForStatic(); PM.add(createGCInfoDeleter()); @@ -204,7 +200,7 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, bool DisableVerify) { // Make sure the code model is set. setCodeModelForJIT(); - + // Add common CodeGen passes. MCContext *Ctx = 0; if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) @@ -216,19 +212,36 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return false; // success! } +/// addPassesToEmitMC - Add passes to the specified pass manager to get +/// machine code emitted with the MCJIT. This method returns true if machine +/// code is not supported. It fills the MCContext Ctx pointer which can be +/// used to build custom MCStreamer. +/// +bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, + MCContext *&Ctx, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { + // Add common CodeGen passes. + if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) + return true; + // Make sure the code model is set. + setCodeModelForJIT(); + + return false; // success! +} + static void printNoVerify(PassManagerBase &PM, const char *Banner) { if (PrintMachineCode) PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); } static void printAndVerify(PassManagerBase &PM, - const char *Banner, - bool allowDoubleDefs = false) { + const char *Banner) { if (PrintMachineCode) PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); if (VerifyMachineCode) - PM.add(createMachineVerifierPass(allowDoubleDefs)); + PM.add(createMachineVerifierPass()); } /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both @@ -258,6 +271,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } + PM.add(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + // Turn exception handling constructs into something the code generators can // handle. switch (getMCAsmInfo()->getExceptionHandlingType()) { @@ -269,26 +287,25 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. PM.add(createSjLjEHPass(getTargetLowering())); - PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None)); - break; + // FALLTHROUGH case ExceptionHandling::Dwarf: - PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None)); + PM.add(createDwarfEHPass(this)); break; case ExceptionHandling::None: PM.add(createLowerInvokePass(getTargetLowering())); + + // The lower invoke pass may create unreachable code. Remove it. + PM.add(createUnreachableBlockEliminationPass()); break; } - PM.add(createGCLoweringPass()); - - // Make sure that no unreachable blocks are instruction selected. - PM.add(createUnreachableBlockEliminationPass()); - if (OptLevel != CodeGenOpt::None && !DisableCGP) PM.add(createCodeGenPreparePass(getTargetLowering())); PM.add(createStackProtectorPass(getTargetLowering())); + addPreISel(PM, OptLevel); + if (PrintISelInput) PM.add(createPrintFunctionPass("\n\n" "*** Final LLVM Code input to ISel ***\n", @@ -300,13 +317,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createVerifierPass()); // Standard Lower-Level Passes. - + // Install a MachineModuleInfo class, which is an immutable pass that holds // all the per-module stuff we're generating, including MCContext. MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo()); PM.add(MMI); OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. - // Set up a MachineFunction for the rest of CodeGen to work on. PM.add(new MachineFunctionAnalysis(*this, OptLevel)); @@ -321,44 +337,43 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, return true; // Print the instruction selected machine code... - printAndVerify(PM, "After Instruction Selection", - /* allowDoubleDefs= */ true); + printAndVerify(PM, "After Instruction Selection"); // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. if (OptLevel != CodeGenOpt::None) PM.add(createOptimizePHIsPass()); + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + PM.add(createLocalStackSlotAllocationPass()); + if (OptLevel != CodeGenOpt::None) { // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only // used by tail calls, where the tail calls reuse the incoming stack // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). PM.add(createDeadMachineInstructionElimPass()); - printAndVerify(PM, "After codegen DCE pass", - /* allowDoubleDefs= */ true); + printAndVerify(PM, "After codegen DCE pass"); - PM.add(createOptimizeExtsPass()); + PM.add(createPeepholeOptimizerPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); PM.add(createMachineCSEPass()); if (!DisableMachineSink) PM.add(createMachineSinkingPass()); - printAndVerify(PM, "After Machine LICM, CSE and Sinking passes", - /* allowDoubleDefs= */ true); + printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); } // Pre-ra tail duplication. if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { PM.add(createTailDuplicatePass(true)); - printAndVerify(PM, "After Pre-RegAlloc TailDuplicate", - /* allowDoubleDefs= */ true); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); } // Run pre-ra passes. if (addPreRegAlloc(PM, OptLevel)) - printAndVerify(PM, "After PreRegAlloc passes", - /* allowDoubleDefs= */ true); + printAndVerify(PM, "After PreRegAlloc passes"); // Perform register allocation. PM.add(createRegisterAllocator(OptLevel)); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index ad5728458062f..59f380ad26414 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -166,6 +166,56 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { return I != begin() && (--I)->end > Start; } + +/// ValNo is dead, remove it. If it is the largest value number, just nuke it +/// (and any other deleted values neighboring it), otherwise mark it as ~1U so +/// it can be nuked later. +void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { + if (ValNo->id == getNumValNums()-1) { + do { + valnos.pop_back(); + } while (!valnos.empty() && valnos.back()->isUnused()); + } else { + ValNo->setIsUnused(true); + } +} + +/// RenumberValues - Renumber all values in order of appearance and delete the +/// remaining unused values. +void LiveInterval::RenumberValues(LiveIntervals &lis) { + SmallPtrSet<VNInfo*, 8> Seen; + bool seenPHIDef = false; + valnos.clear(); + for (const_iterator I = begin(), E = end(); I != E; ++I) { + VNInfo *VNI = I->valno; + if (!Seen.insert(VNI)) + continue; + assert(!VNI->isUnused() && "Unused valno used by live range"); + VNI->id = (unsigned)valnos.size(); + valnos.push_back(VNI); + VNI->setHasPHIKill(false); + if (VNI->isPHIDef()) + seenPHIDef = true; + } + + // Recompute phi kill flags. + if (!seenPHIDef) + return; + for (const_vni_iterator I = vni_begin(), E = vni_end(); I != E; ++I) { + VNInfo *VNI = *I; + if (!VNI->isPHIDef()) + continue; + const MachineBasicBlock *PHIBB = lis.getMBBFromIndex(VNI->def); + assert(PHIBB && "No basic block for phi-def"); + for (MachineBasicBlock::const_pred_iterator PI = PHIBB->pred_begin(), + PE = PHIBB->pred_end(); PI != PE; ++PI) { + VNInfo *KVNI = getVNInfoAt(lis.getMBBEndIdx(*PI).getPrevSlot()); + if (KVNI) + KVNI->setHasPHIKill(true); + } + } +} + /// extendIntervalEndTo - This method is used when we want to extend the range /// specified by I to end at the specified endpoint. To do this, we should /// merge and eliminate all ranges that this will overlap with. The iterator is @@ -175,7 +225,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { VNInfo *ValNo = I->valno; // Search for the first interval that we can't merge with. - Ranges::iterator MergeTo = next(I); + Ranges::iterator MergeTo = llvm::next(I); for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) { assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); } @@ -184,11 +234,11 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { I->end = std::max(NewEnd, prior(MergeTo)->end); // Erase any dead ranges. - ranges.erase(next(I), MergeTo); + ranges.erase(llvm::next(I), MergeTo); // If the newly formed range now touches the range after it and if they have // the same value number, merge the two ranges into one range. - Ranges::iterator Next = next(I); + Ranges::iterator Next = llvm::next(I); if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) { I->end = Next->end; ranges.erase(Next); @@ -227,7 +277,7 @@ LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) { MergeTo->end = I->end; } - ranges.erase(next(MergeTo), next(I)); + ranges.erase(llvm::next(MergeTo), llvm::next(I)); return MergeTo; } @@ -280,7 +330,7 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { return ranges.insert(it, LR); } -/// isInOneLiveRange - Return true if the range specified is entirely in +/// isInOneLiveRange - Return true if the range specified is entirely in /// a single LiveRange of the live interval. bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) { Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); @@ -314,16 +364,8 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, break; } if (isDead) { - // Now that ValNo is dead, remove it. If it is the largest value - // number, just nuke it (and any other deleted values neighboring it), - // otherwise mark it as ~1U so it can be nuked later. - if (ValNo->id == getNumValNums()-1) { - do { - valnos.pop_back(); - } while (!valnos.empty() && valnos.back()->isUnused()); - } else { - ValNo->setIsUnused(true); - } + // Now that ValNo is dead, remove it. + markValNoForDeletion(ValNo); } } @@ -345,7 +387,7 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, I->end = Start; // Trim the old interval. // Insert the new one. - ranges.insert(next(I), LiveRange(End, OldEnd, ValNo)); + ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo)); } /// removeValNo - Remove all the ranges defined by the specified value#. @@ -359,21 +401,13 @@ void LiveInterval::removeValNo(VNInfo *ValNo) { if (I->valno == ValNo) ranges.erase(I); } while (I != E); - // Now that ValNo is dead, remove it. If it is the largest value - // number, just nuke it (and any other deleted values neighboring it), - // otherwise mark it as ~1U so it can be nuked later. - if (ValNo->id == getNumValNums()-1) { - do { - valnos.pop_back(); - } while (!valnos.empty() && valnos.back()->isUnused()); - } else { - ValNo->setIsUnused(true); - } + // Now that ValNo is dead, remove it. + markValNoForDeletion(ValNo); } /// getLiveRangeContaining - Return the live range that contains the /// specified index, or null if there is none. -LiveInterval::const_iterator +LiveInterval::const_iterator LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const { const_iterator It = std::upper_bound(begin(), end(), Idx); if (It != ranges.begin()) { @@ -385,7 +419,7 @@ LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const { return end(); } -LiveInterval::iterator +LiveInterval::iterator LiveInterval::FindLiveRangeContaining(SlotIndex Idx) { iterator It = std::upper_bound(begin(), end(), Idx); if (It != begin()) { @@ -393,7 +427,7 @@ LiveInterval::FindLiveRangeContaining(SlotIndex Idx) { if (It->contains(Idx)) return It; } - + return end(); } @@ -425,11 +459,11 @@ VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const { /// the intervals are not joinable, this aborts. void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments, - const int *RHSValNoAssignments, + const int *RHSValNoAssignments, SmallVector<VNInfo*, 16> &NewVNInfo, MachineRegisterInfo *MRI) { // Determine if any of our live range values are mapped. This is uncommon, so - // we want to avoid the interval scan if not. + // we want to avoid the interval scan if not. bool MustMapCurValNos = false; unsigned NumVals = getNumValNums(); unsigned NumNewVals = NewVNInfo.size(); @@ -449,7 +483,7 @@ void LiveInterval::join(LiveInterval &Other, ++OutIt; for (iterator I = OutIt, E = end(); I != E; ++I) { OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]]; - + // If this live range has the same value # as its immediate predecessor, // and if they are neighbors, remove one LiveRange. This happens when we // have [0,3:0)[4,7:1) and map 0/1 onto the same value #. @@ -460,12 +494,12 @@ void LiveInterval::join(LiveInterval &Other, OutIt->start = I->start; OutIt->end = I->end; } - + // Didn't merge, on to the next one. ++OutIt; } } - + // If we merge some live ranges, chop off the end. ranges.erase(OutIt, end()); } @@ -483,7 +517,7 @@ void LiveInterval::join(LiveInterval &Other, if (VNI) { if (NumValNos >= NumVals) valnos.push_back(VNI); - else + else valnos[NumValNos] = VNI; VNI->id = NumValNos++; // Renumber val#. } @@ -502,25 +536,13 @@ void LiveInterval::join(LiveInterval &Other, } ComputeJoinedWeight(Other); - - // Update regalloc hint if currently there isn't one. - if (TargetRegisterInfo::isVirtualRegister(reg) && - TargetRegisterInfo::isVirtualRegister(Other.reg)) { - std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(reg); - if (Hint.first == 0 && Hint.second == 0) { - std::pair<unsigned, unsigned> OtherHint = - MRI->getRegAllocationHint(Other.reg); - if (OtherHint.first || OtherHint.second) - MRI->setRegAllocationHint(reg, OtherHint.first, OtherHint.second); - } - } } /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live /// interval as the specified value number. The LiveRanges in RHS are /// allowed to overlap with LiveRanges in the current interval, but only if /// the overlapping LiveRanges have the specified value number. -void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, +void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, VNInfo *LHSValNo) { // TODO: Make this more efficient. iterator InsertPos = begin(); @@ -569,7 +591,7 @@ void LiveInterval::MergeValueInAsValue( // If this trimmed away the whole range, ignore it. if (Start == End) continue; } - + // Map the valno in the other live range to the current live range. IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP); } @@ -584,18 +606,10 @@ void LiveInterval::MergeValueInAsValue( if (I->valno == V1) { isDead = false; break; - } - if (isDead) { - // Now that V1 is dead, remove it. If it is the largest value number, - // just nuke it (and any other deleted values neighboring it), otherwise - // mark it as ~1U so it can be nuked later. - if (V1->id == getNumValNums()-1) { - do { - valnos.pop_back(); - } while (!valnos.empty() && valnos.back()->isUnused()); - } else { - V1->setIsUnused(true); } + if (isDead) { + // Now that V1 is dead, remove it. + markValNoForDeletion(V1); } } } @@ -609,7 +623,7 @@ void LiveInterval::MergeInClobberRanges(LiveIntervals &li_, const LiveInterval &Clobbers, VNInfo::Allocator &VNInfoAllocator) { if (Clobbers.empty()) return; - + DenseMap<VNInfo*, VNInfo*> ValNoMaps; VNInfo *UnusedValNo = 0; iterator IP = begin(); @@ -679,10 +693,10 @@ void LiveInterval::MergeInClobberRange(LiveIntervals &li_, // for unknown values, use it. VNInfo *ClobberValNo = getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator); - + iterator IP = begin(); IP = std::upper_bound(IP, end(), Start); - + // If the start of this range overlaps with an existing liverange, trim it. if (IP != begin() && IP[-1].end > Start) { Start = IP[-1].end; @@ -695,7 +709,7 @@ void LiveInterval::MergeInClobberRange(LiveIntervals &li_, // If this trimmed away the whole range, ignore it. if (Start == End) return; } - + // Insert the clobber interval. addRangeFrom(LiveRange(Start, End, ClobberValNo), IP); } @@ -722,7 +736,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { for (iterator I = begin(); I != end(); ) { iterator LR = I++; if (LR->valno != V1) continue; // Not a V1 LiveRange. - + // Okay, we found a V1 live range. If it had a previous, touching, V2 live // range, extend it. if (LR != begin()) { @@ -736,11 +750,11 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { LR = Prev; } } - + // Okay, now we have a V1 or V2 live range that is maximally merged forward. // Ensure that it is a V2 live-range. LR->valno = V2; - + // If we can merge it into later V2 live ranges, do so now. We ignore any // following V1 live ranges, as they will be merged in subsequent iterations // of the loop. @@ -752,18 +766,10 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { } } } - - // Now that V1 is dead, remove it. If it is the largest value number, just - // nuke it (and any other deleted values neighboring it), otherwise mark it as - // ~1U so it can be nuked later. - if (V1->id == getNumValNums()-1) { - do { - valnos.pop_back(); - } while (valnos.back()->isUnused()); - } else { - V1->setIsUnused(true); - } - + + // Now that V1 is dead, remove it. + markValNoForDeletion(V1); + return V2; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 194d03d8dbfb5..2726fc337539c 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -47,7 +47,7 @@ using namespace llvm; // Hidden options for help debugging. -static cl::opt<bool> DisableReMat("disable-rematerialization", +static cl::opt<bool> DisableReMat("disable-rematerialization", cl::init(false), cl::Hidden); STATISTIC(numIntervals , "Number of original intervals"); @@ -55,22 +55,24 @@ STATISTIC(numFolds , "Number of loads/stores folded into instructions"); STATISTIC(numSplits , "Number of intervals split"); char LiveIntervals::ID = 0; -static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis"); +INITIALIZE_PASS(LiveIntervals, "liveintervals", + "Live Interval Analysis", false, false); void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>(); - AU.addPreserved<LiveVariables>(); AU.addRequired<LiveVariables>(); - AU.addPreservedID(MachineLoopInfoID); + AU.addPreserved<LiveVariables>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); AU.addPreservedID(MachineDominatorsID); - + if (!StrongPHIElim) { AU.addPreservedID(PHIEliminationID); AU.addRequiredID(PHIEliminationID); } - + AU.addRequiredID(TwoAddressInstructionPassID); AU.addPreserved<ProcessImplicitDefs>(); AU.addRequired<ProcessImplicitDefs>(); @@ -84,7 +86,7 @@ void LiveIntervals::releaseMemory() { for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(), E = r2iMap_.end(); I != E; ++I) delete I->second; - + r2iMap_.clear(); // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. @@ -188,10 +190,6 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, const MachineInstr &MI = *I; // Allow copies to and from li.reg - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) - if (SrcReg == li.reg || DstReg == li.reg) - continue; if (MI.isCopy()) if (MI.getOperand(0).getReg() == li.reg || MI.getOperand(1).getReg() == li.reg) @@ -278,7 +276,7 @@ bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { /// isPartialRedef - Return true if the specified def at the specific index is /// partially re-defining the specified live interval. A common case of this is -/// a definition of the sub-register. +/// a definition of the sub-register. bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, LiveInterval &interval) { if (!MO.getSubReg() || MO.isEarlyClobber()) @@ -324,9 +322,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, mi->addRegisterDefined(interval.reg); MachineInstr *CopyMI = NULL; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->isCopyLike() || - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) { + if (mi->isCopyLike()) { CopyMI = mi; } @@ -420,8 +416,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // def-and-use register operand. // It may also be partial redef like this: - // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0 - // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0 + // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0 + // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0 bool PartReDef = isPartialRedef(MIIdx, MO, interval); if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) { // If this is a two-address definition, then we have already processed @@ -454,11 +450,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, OldValNo->setCopy(0); // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ... - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (PartReDef && (mi->isCopyLike() || - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))) + if (PartReDef && mi->isCopyLike()) OldValNo->setCopy(&*mi); - + // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); DEBUG(dbgs() << " replace range with " << LR); @@ -485,12 +479,10 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, VNInfo *ValNo; MachineInstr *CopyMI = NULL; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->isCopyLike() || - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (mi->isCopyLike()) CopyMI = mi; ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); - + SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); @@ -567,10 +559,10 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, goto exit; } } - + baseIndex = baseIndex.getNextIndex(); } - + // The only case we should have a dead physreg here without a killing or // instruction where we know it's dead is if it is live-in to the function // and never used. Another possible case is the implicit use of the @@ -602,9 +594,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, getOrCreateInterval(MO.getReg())); else if (allocatableRegs_[MO.getReg()]) { MachineInstr *CopyMI = NULL; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (MI->isCopyLike() || - tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (MI->isCopyLike()) CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, getOrCreateInterval(MO.getReg()), CopyMI); @@ -696,7 +686,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, /// registers. for some ordering of the machine instructions [1,N] a /// live interval is an interval [i, j) where 1 <= i <= j < N for /// which a variable is live -void LiveIntervals::computeIntervals() { +void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" << "********** Function: " << ((Value*)mf_->getFunction())->getName() << '\n'); @@ -723,11 +713,11 @@ void LiveIntervals::computeIntervals() { handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS), true); } - + // Skip over empty initial indices. if (getInstructionFromIndex(MIIndex) == 0) MIIndex = indexes_->getNextNonNullIndex(MIIndex); - + for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); MI != miEnd; ++MI) { DEBUG(dbgs() << MIIndex << "\t" << *MI); @@ -746,7 +736,7 @@ void LiveIntervals::computeIntervals() { else if (MO.isUndef()) UndefUses.push_back(MO.getReg()); } - + // Move to the next instr slot. MIIndex = indexes_->getNextNonNullIndex(MIIndex); } @@ -791,7 +781,7 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, unsigned Reg = MO.getReg(); if (Reg == 0 || Reg == li.reg) continue; - + if (TargetRegisterInfo::isPhysicalRegister(Reg) && !allocatableRegs_[Reg]) continue; @@ -810,7 +800,7 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, /// which reaches the given instruction also reaches the specified use index. bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, SlotIndex UseIdx) const { - SlotIndex Index = getInstructionIndex(MI); + SlotIndex Index = getInstructionIndex(MI); VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno; LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx); return UI != li.end() && UI->valno == ValNo; @@ -915,7 +905,7 @@ static bool FilterFoldedOps(MachineInstr *MI, } return false; } - + /// tryFoldMemoryOperand - Attempts to fold either a spill / restore from /// slot / to reg or any rematerialized load into ith operand of specified @@ -1035,7 +1025,7 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, /// for addIntervalsForSpills to rewrite uses / defs for the given live range. bool LiveIntervals:: rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, - bool TrySplit, SlotIndex index, SlotIndex end, + bool TrySplit, SlotIndex index, SlotIndex end, MachineInstr *MI, MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, unsigned Slot, int LdSlot, @@ -1094,7 +1084,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, // keep the src/dst regs pinned. // // Keep track of whether we replace a use and/or def so that we can - // create the spill interval with the appropriate range. + // create the spill interval with the appropriate range. SmallVector<unsigned, 2> Ops; tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops); @@ -1156,7 +1146,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (mopj.isImplicit()) rewriteImplicitOps(li, MI, NewVReg, vrm); } - + if (CreatedNewVReg) { if (DefIsReMat) { vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI); @@ -1696,7 +1686,7 @@ addIntervalsForSpills(const LiveInterval &li, if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) { if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT) Slot = vrm.assignVirt2StackSlot(li.reg); - + // This case only occurs when the prealloc splitter has already assigned // a stack slot to this vreg. else @@ -1753,7 +1743,7 @@ addIntervalsForSpills(const LiveInterval &li, Ops.push_back(j); if (MO.isDef()) continue; - if (isReMat || + if (isReMat || (!FoundUse && !alsoFoldARestore(Id, index, VReg, RestoreMBBs, RestoreIdxes))) { // MI has two-address uses of the same register. If the use @@ -1866,7 +1856,6 @@ addIntervalsForSpills(const LiveInterval &li, for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) { LiveInterval *LI = NewLIs[i]; if (!LI->empty()) { - LI->weight /= SlotIndex::NUM * getApproximateInstructionCount(*LI); if (!AddedKill.count(LI)) { LiveRange *LR = &LI->ranges[LI->ranges.size()-1]; SlotIndex LastUseIdx = LR->end.getBaseIndex(); @@ -1899,7 +1888,7 @@ bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const { /// getRepresentativeReg - Find the largest super register of the specified /// physical register. unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const { - // Find the largest super-register that is allocatable. + // Find the largest super-register that is allocatable. unsigned BestReg = Reg; for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) { unsigned SuperReg = *AS; @@ -2013,7 +2002,7 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, SlotIndex(getInstructionIndex(startInst).getDefIndex()), getMBBEndIdx(startInst->getParent()), VN); Interval.addRange(LR); - + return LR; } diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index 709e2c6d5ca7e..b5c385f772394 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -25,7 +25,8 @@ using namespace llvm; char LiveStacks::ID = 0; -static RegisterPass<LiveStacks> X("livestacks", "Live Stack Slot Analysis"); +INITIALIZE_PASS(LiveStacks, "livestacks", + "Live Stack Slot Analysis", false, false); void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 41b891d30f23b..375307b973a97 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -42,7 +42,8 @@ using namespace llvm; char LiveVariables::ID = 0; -static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis"); +INITIALIZE_PASS(LiveVariables, "livevars", + "Live Variable Analysis", false, false); void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { @@ -482,21 +483,6 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, } } -namespace { - struct RegSorter { - const TargetRegisterInfo *TRI; - - RegSorter(const TargetRegisterInfo *tri) : TRI(tri) { } - bool operator()(unsigned A, unsigned B) { - if (TRI->isSubRegister(A, B)) - return true; - else if (TRI->isSubRegister(B, A)) - return false; - return A < B; - } - }; -} - bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MRI = &mf.getRegInfo(); diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp new file mode 100644 index 0000000000000..7e366f0ceec02 --- /dev/null +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -0,0 +1,354 @@ +//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass assigns local frame indices to stack slots relative to one another +// and allocates additional base registers to access them when the target +// estimates the are likely to be out of range of stack pointer and frame +// pointer relative addressing. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "localstackalloc" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" + +using namespace llvm; + +STATISTIC(NumAllocations, "Number of frame indices allocated into local block"); +STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated"); +STATISTIC(NumReplacements, "Number of frame indices references replaced"); + +namespace { + class FrameRef { + MachineBasicBlock::iterator MI; // Instr referencing the frame + int64_t LocalOffset; // Local offset of the frame idx referenced + public: + FrameRef(MachineBasicBlock::iterator I, int64_t Offset) : + MI(I), LocalOffset(Offset) {} + bool operator<(const FrameRef &RHS) const { + return LocalOffset < RHS.LocalOffset; + } + MachineBasicBlock::iterator getMachineInstr() { return MI; } + }; + + class LocalStackSlotPass: public MachineFunctionPass { + SmallVector<int64_t,16> LocalOffsets; + + void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset, + bool StackGrowsDown, unsigned &MaxAlign); + void calculateFrameObjectOffsets(MachineFunction &Fn); + bool insertFrameReferenceRegisters(MachineFunction &Fn); + public: + static char ID; // Pass identification, replacement for typeid + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { } + bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + const char *getPassName() const { + return "Local Stack Slot Allocation"; + } + + private: + }; +} // end anonymous namespace + +char LocalStackSlotPass::ID = 0; + +FunctionPass *llvm::createLocalStackSlotAllocationPass() { + return new LocalStackSlotPass(); +} + +bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + unsigned LocalObjectCount = MFI->getObjectIndexEnd(); + + // If the target doesn't want/need this pass, or if there are no locals + // to consider, early exit. + if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0) + return true; + + // Make sure we have enough space to store the local offsets. + LocalOffsets.resize(MFI->getObjectIndexEnd()); + + // Lay out the local blob. + calculateFrameObjectOffsets(MF); + + // Insert virtual base registers to resolve frame index references. + bool UsedBaseRegs = insertFrameReferenceRegisters(MF); + + // Tell MFI whether any base registers were allocated. PEI will only + // want to use the local block allocations from this pass if there were any. + // Otherwise, PEI can do a bit better job of getting the alignment right + // without a hole at the start since it knows the alignment of the stack + // at the start of local allocation, and this pass doesn't. + MFI->setUseLocalStackAllocationBlock(UsedBaseRegs); + + return true; +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, + int FrameIdx, int64_t &Offset, + bool StackGrowsDown, + unsigned &MaxAlign) { + // If the stack grows down, add the object size to find the lowest address. + if (StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + int64_t LocalOffset = StackGrowsDown ? -Offset : Offset; + DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " + << LocalOffset << "\n"); + // Keep the offset available for base register allocation + LocalOffsets[FrameIdx] = LocalOffset; + // And tell MFI about it for PEI to use later + MFI->mapLocalFrameObject(FrameIdx, LocalOffset); + + if (!StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + ++NumAllocations; +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + int64_t Offset = 0; + unsigned MaxAlign = 0; + + // Make sure that the stack protector comes before the local variables on the + // stack. + SmallSet<int, 16> LargeStackObjs; + if (MFI->getStackProtectorIndex() >= 0) { + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset, + StackGrowsDown, MaxAlign); + + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (!MFI->MayNeedStackProtector(i)) + continue; + + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + LargeStackObjs.insert(i); + } + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (LargeStackObjs.count(i)) + continue; + + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + } + + // Remember how big this blob of stack space is + MFI->setLocalFrameSize(Offset); + MFI->setLocalFrameMaxAlign(MaxAlign); +} + +static inline bool +lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs, + std::pair<unsigned, int64_t> &RegOffset, + int64_t FrameSizeAdjust, + int64_t LocalFrameOffset, + const MachineInstr *MI, + const TargetRegisterInfo *TRI) { + unsigned e = Regs.size(); + for (unsigned i = 0; i < e; ++i) { + RegOffset = Regs[i]; + // Check if the relative offset from the where the base register references + // to the target address is in range for the instruction. + int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second; + if (TRI->isFrameOffsetLegal(MI, Offset)) + return true; + } + return false; +} + +bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { + // Scan the function's instructions looking for frame index references. + // For each, ask the target if it wants a virtual base register for it + // based on what we can tell it about where the local will end up in the + // stack frame. If it wants one, re-use a suitable one we've previously + // allocated, or if there isn't one that fits the bill, allocate a new one + // and ask the target to create a defining instruction for it. + bool UsedBaseReg = false; + + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + MachineBasicBlock::iterator InsertionPt = Fn.begin()->begin(); + + // Collect all of the instructions in the block that reference + // a frame index. Also store the frame index referenced to ease later + // lookup. (For any insn that has more than one FI reference, we arbitrarily + // choose the first one). + SmallVector<FrameRef, 64> FrameReferenceInsns; + // A base register definition is a register+offset pair. + SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters; + + + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + MachineInstr *MI = I; + // Debug value instructions can't be out of range, so they don't need + // any updates. + if (MI->isDebugValue()) + continue; + // For now, allocate the base register(s) within the basic block + // where they're used, and don't try to keep them around outside + // of that. It may be beneficial to try sharing them more broadly + // than that, but the increased register pressure makes that a + // tricky thing to balance. Investigate if re-materializing these + // becomes an issue. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + // Consider replacing all frame index operands that reference + // an object allocated in the local block. + if (MI->getOperand(i).isFI()) { + // Don't try this with values not in the local block. + if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex())) + break; + FrameReferenceInsns. + push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()])); + break; + } + } + } + } + // Sort the frame references by local offset + array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); + + + // Loop throught the frame references and allocate for them as necessary + for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) { + MachineBasicBlock::iterator I = + FrameReferenceInsns[ref].getMachineInstr(); + MachineInstr *MI = I; + for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) { + // Consider replacing all frame index operands that reference + // an object allocated in the local block. + if (MI->getOperand(idx).isFI()) { + int FrameIdx = MI->getOperand(idx).getIndex(); + + assert(MFI->isObjectPreAllocated(FrameIdx) && + "Only pre-allocated locals expected!"); + + DEBUG(dbgs() << "Considering: " << *MI); + if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) { + unsigned BaseReg = 0; + int64_t Offset = 0; + int64_t FrameSizeAdjust = + StackGrowsDown ? MFI->getLocalFrameSize() : 0; + + DEBUG(dbgs() << " Replacing FI in: " << *MI); + + // If we have a suitable base register available, use it; otherwise + // create a new one. Note that any offset encoded in the + // instruction itself will be taken into account by the target, + // so we don't have to adjust for it here when reusing a base + // register. + std::pair<unsigned, int64_t> RegOffset; + if (lookupCandidateBaseReg(BaseRegisters, RegOffset, + FrameSizeAdjust, + LocalOffsets[FrameIdx], + MI, TRI)) { + DEBUG(dbgs() << " Reusing base register " << + RegOffset.first << "\n"); + // We found a register to reuse. + BaseReg = RegOffset.first; + Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] - + RegOffset.second; + } else { + // No previously defined register was in range, so create a + // new one. + int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); + const TargetRegisterClass *RC = TRI->getPointerRegClass(); + BaseReg = Fn.getRegInfo().createVirtualRegister(RC); + + DEBUG(dbgs() << " Materializing base register " << BaseReg << + " at frame local offset " << + LocalOffsets[FrameIdx] + InstrOffset << "\n"); + // Tell the target to insert the instruction to initialize + // the base register. + TRI->materializeFrameBaseRegister(InsertionPt, BaseReg, + FrameIdx, InstrOffset); + + // The base register already includes any offset specified + // by the instruction, so account for that so it doesn't get + // applied twice. + Offset = -InstrOffset; + + int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] + + InstrOffset; + BaseRegisters.push_back( + std::pair<unsigned, int64_t>(BaseReg, BaseOffset)); + ++NumBaseRegisters; + UsedBaseReg = true; + } + assert(BaseReg != 0 && "Unable to allocate virtual base register!"); + + // Modify the instruction to use the new base register rather + // than the frame index operand. + TRI->resolveFrameIndex(I, BaseReg, Offset); + DEBUG(dbgs() << "Resolved: " << *MI); + + ++NumReplacements; + } + } + } + } + return UsedBaseReg; +} diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index dfd4eaeca6603..ad1c537c1911a 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -36,7 +36,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - LowerSubregsInstructionPass() : MachineFunctionPass(&ID) {} + LowerSubregsInstructionPass() : MachineFunctionPass(ID) {} const char *getPassName() const { return "Subregister lowering instruction pass"; @@ -58,9 +58,6 @@ namespace { void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, const TargetRegisterInfo *TRI); - void TransferKillFlag(MachineInstr *MI, unsigned SrcReg, - const TargetRegisterInfo *TRI, - bool AddIfNotFound = false); void TransferImplicitDefs(MachineInstr *MI); }; @@ -87,23 +84,6 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, } } -/// TransferKillFlag - MI is a pseudo-instruction with SrcReg killed, -/// and the lowered replacement instructions immediately precede it. -/// Mark the replacement instructions with the kill flag. -void -LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI, - unsigned SrcReg, - const TargetRegisterInfo *TRI, - bool AddIfNotFound) { - for (MachineBasicBlock::iterator MII = - prior(MachineBasicBlock::iterator(MI)); ; --MII) { - if (MII->addRegisterKilled(SrcReg, TRI, AddIfNotFound)) - break; - assert(MII != MI->getParent()->begin() && - "copyPhysReg output doesn't reference source register!"); - } -} - /// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered /// replacement instructions immediately precede it. Copy any implicit-def /// operands from MI to the replacement instruction. diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index a27ee479433be..50f3f672dcedc 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -441,7 +441,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB); - DEBUG(dbgs() << "PHIElimination splitting critical edge:" + DEBUG(dbgs() << "Splitting critical edge:" " BB#" << getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); @@ -468,11 +468,33 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LV->addNewBlock(NMBB, this, Succ); if (MachineDominatorTree *MDT = - P->getAnalysisIfAvailable<MachineDominatorTree>()) - MDT->addNewBlock(NMBB, this); + P->getAnalysisIfAvailable<MachineDominatorTree>()) { + // Update dominator information. + MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ); + + bool IsNewIDom = true; + for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end(); + PI != E; ++PI) { + MachineBasicBlock *PredBB = *PI; + if (PredBB == NMBB) + continue; + if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) { + IsNewIDom = false; + break; + } + } + + // We know "this" dominates the newly created basic block. + MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this); + + // If all the other predecessors of "Succ" are dominated by "Succ" itself + // then the new block is the new immediate dominator of "Succ". Otherwise, + // the new block doesn't dominate anything. + if (IsNewIDom) + MDT->changeImmediateDominator(SucccDTNode, NewDTNode); + } - if (MachineLoopInfo *MLI = - P->getAnalysisIfAvailable<MachineLoopInfo>()) + if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) if (MachineLoop *TIL = MLI->getLoopFor(this)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 833cc00027db5..92e2299ec62fa 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -41,7 +41,7 @@ namespace { MachineRegisterInfo *MRI; public: static char ID; // Pass identification - MachineCSE() : MachineFunctionPass(&ID), LookAheadLimit(5), CurrVN(0) {} + MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -49,6 +49,7 @@ namespace { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); + AU.addPreservedID(MachineLoopInfoID); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); } @@ -85,8 +86,8 @@ namespace { } // end anonymous namespace char MachineCSE::ID = 0; -static RegisterPass<MachineCSE> -X("machine-cse", "Machine Common Subexpression Elimination"); +INITIALIZE_PASS(MachineCSE, "machine-cse", + "Machine Common Subexpression Elimination", false, false); FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } @@ -107,29 +108,9 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, MachineInstr *DefMI = MRI->getVRegDef(Reg); if (DefMI->getParent() != MBB) continue; - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - TargetRegisterInfo::isVirtualRegister(SrcReg) && - !SrcSubIdx && !DstSubIdx) { - const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); - if (!NewRC) - continue; - DEBUG(dbgs() << "Coalescing: " << *DefMI); - DEBUG(dbgs() << "*** to: " << *MI); - MO.setReg(SrcReg); - MRI->clearKillFlags(SrcReg); - if (NewRC != SRC) - MRI->setRegClass(SrcReg, NewRC); - DefMI->eraseFromParent(); - ++NumCoalesces; - Changed = true; - } - if (!DefMI->isCopy()) continue; - SrcReg = DefMI->getOperand(1).getReg(); + unsigned SrcReg = DefMI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) continue; if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) @@ -261,19 +242,13 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, return false; } -static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) { - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - return MI->isCopyLike() || - TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); -} - bool MachineCSE::isCSECandidate(MachineInstr *MI) { if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) return false; // Ignore copies. - if (isCopy(MI, TII)) + if (MI->isCopyLike()) return false; // Ignore stuff that we obviously can't move. @@ -329,7 +304,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, E = MRI->use_nodbg_end(); I != E; ++I) { MachineInstr *Use = &*I; // Ignore copies. - if (!isCopy(Use, TII)) { + if (!Use->isCopyLike()) { HasNonCopyUse = true; break; } @@ -385,7 +360,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Look for trivial copy coalescing opportunities. if (PerformTrivialCoalescing(MI, MBB)) { // After coalescing MI itself may become a copy. - if (isCopy(MI, TII)) + if (MI->isCopyLike()) continue; FoundCSE = VNT.count(MI); } diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index b5f8fbba99deb..3c674789244a2 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -24,10 +24,10 @@ TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>); char MachineDominatorTree::ID = 0; -static RegisterPass<MachineDominatorTree> -E("machinedomtree", "MachineDominator Tree Construction", true); +INITIALIZE_PASS(MachineDominatorTree, "machinedomtree", + "MachineDominator Tree Construction", true, true); -const PassInfo *const llvm::MachineDominatorsID = &E; +char &llvm::MachineDominatorsID = MachineDominatorTree::ID; void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -41,7 +41,7 @@ bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { } MachineDominatorTree::MachineDominatorTree() - : MachineFunctionPass(&ID) { + : MachineFunctionPass(ID) { DT = new DominatorTreeBase<MachineBasicBlock>(false); } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 666120f032c60..017170076cebb 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -397,7 +397,6 @@ void MachineFunction::viewCFGOnly() const /// create a corresponding virtual register for it. unsigned MachineFunction::addLiveIn(unsigned PReg, const TargetRegisterClass *RC) { - assert(RC->contains(PReg) && "Not the correct regclass!"); MachineRegisterInfo &MRI = getRegInfo(); unsigned VReg = MRI.getLiveInVirtReg(PReg); if (VReg) { @@ -447,7 +446,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, unsigned StackAlign = TFI.getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/false)); + /*isSS*/false, false)); return -++NumFixedObjects; } diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index 07a0f45c0f481..4f84b952e0612 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -20,14 +20,14 @@ using namespace llvm; // a default constructor. static PassInfo X("Machine Function Analysis", "machine-function-analysis", - intptr_t(&MachineFunctionAnalysis::ID), 0, + &MachineFunctionAnalysis::ID, 0, /*CFGOnly=*/false, /*is_analysis=*/true); char MachineFunctionAnalysis::ID = 0; MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm, CodeGenOpt::Level OL) : - FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) { + FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) { } MachineFunctionAnalysis::~MachineFunctionAnalysis() { diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index 547c4febc8dae..2aaa798a02c19 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -29,7 +29,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { const std::string Banner; MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) - : MachineFunctionPass(&ID), OS(os), Banner(banner) {} + : MachineFunctionPass(ID), OS(os), Banner(banner) {} const char *getPassName() const { return "MachineFunction Printer"; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 6b2e98549c718..446e461d5460f 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1236,12 +1236,18 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. const MachineFunction *MF = 0; + const MachineRegisterInfo *MRI = 0; if (const MachineBasicBlock *MBB = getParent()) { MF = MBB->getParent(); if (!TM && MF) TM = &MF->getTarget(); + if (MF) + MRI = &MF->getRegInfo(); } + // Save a list of virtual registers. + SmallVector<unsigned, 8> VirtRegs; + // Print explicitly defined operands on the left of an assignment syntax. unsigned StartOp = 0, e = getNumOperands(); for (; StartOp < e && getOperand(StartOp).isReg() && @@ -1250,6 +1256,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { ++StartOp) { if (StartOp != 0) OS << ", "; getOperand(StartOp).print(OS, TM); + unsigned Reg = getOperand(StartOp).getReg(); + if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) + VirtRegs.push_back(Reg); } if (StartOp != 0) @@ -1264,6 +1273,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + VirtRegs.push_back(MO.getReg()); + // Omit call-clobbered registers which aren't used anywhere. This makes // call instructions much less noisy on targets where calls clobber lots // of registers. Don't rely on MO.isDead() because we may be called before @@ -1325,11 +1338,29 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); i != e; ++i) { OS << **i; - if (next(i) != e) + if (llvm::next(i) != e) OS << " "; } } + // Print the regclass of any virtual registers encountered. + if (MRI && !VirtRegs.empty()) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + for (unsigned i = 0; i != VirtRegs.size(); ++i) { + const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); + OS << " " << RC->getName() << ":%reg" << VirtRegs[i]; + for (unsigned j = i+1; j != VirtRegs.size();) { + if (MRI->getRegClass(VirtRegs[j]) != RC) { + ++j; + continue; + } + if (VirtRegs[i] != VirtRegs[j]) + OS << "," << VirtRegs[j]; + VirtRegs.erase(VirtRegs.begin()+j); + } + } + } + if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; OS << " dbg:"; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 4c054f51f3a8a..1a74b747e9f2a 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -68,16 +68,16 @@ namespace { BitVector AllocatableSet; - // For each opcode, keep a list of potentail CSE instructions. + // For each opcode, keep a list of potential CSE instructions. DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; public: static char ID; // Pass identification, replacement for typeid MachineLICM() : - MachineFunctionPass(&ID), PreRegAlloc(true) {} + MachineFunctionPass(ID), PreRegAlloc(true) {} explicit MachineLICM(bool PreRA) : - MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} + MachineFunctionPass(ID), PreRegAlloc(PreRA) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -189,8 +189,8 @@ namespace { } // end anonymous namespace char MachineLICM::ID = 0; -static RegisterPass<MachineLICM> -X("machinelicm", "Machine Loop Invariant Code Motion"); +INITIALIZE_PASS(MachineLICM, "machinelicm", + "Machine Loop Invariant Code Motion", false, false); FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { return new MachineLICM(PreRegAlloc); @@ -488,9 +488,14 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) { MII = NextMII; } - const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); - for (unsigned I = 0, E = Children.size(); I != E; ++I) - HoistRegion(Children[I]); + // Don't hoist things out of a large switch statement. This often causes + // code to be hoisted that wasn't going to be executed, and increases + // register pressure in a situation where it's likely to matter. + if (BB->succ_size() < 25) { + const std::vector<MachineDomTreeNode*> &Children = N->getChildren(); + for (unsigned I = 0, E = Children.size(); I != E; ++I) + HoistRegion(Children[I]); + } } /// IsLICMCandidate - Returns true if the instruction may be a suitable diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 269538b31d0b9..bca4b0c28985f 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -30,10 +30,10 @@ TEMPLATE_INSTANTIATION(MLIB); } char MachineLoopInfo::ID = 0; -static RegisterPass<MachineLoopInfo> -X("machine-loops", "Machine Natural Loop Construction", true); +INITIALIZE_PASS(MachineLoopInfo, "machine-loops", + "Machine Natural Loop Construction", true, true); -const PassInfo *const llvm::MachineLoopInfoID = &X; +char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { releaseMemory(); diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 15778b46fe0a3..b647a4dcc5308 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -28,8 +28,8 @@ using namespace llvm; using namespace llvm::dwarf; // Handle the Pass registration stuff necessary to use TargetData's. -static RegisterPass<MachineModuleInfo> -X("machinemoduleinfo", "Machine Module Information"); +INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", + "Machine Module Information", false, false); char MachineModuleInfo::ID = 0; // Out of line virtual method. @@ -254,7 +254,7 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { //===----------------------------------------------------------------------===// MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI) -: ImmutablePass(&ID), Context(MAI), +: ImmutablePass(ID), Context(MAI), ObjFileMMI(0), CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){ // Always emit some info, by default "no personality" info. @@ -264,7 +264,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI) } MachineModuleInfo::MachineModuleInfo() -: ImmutablePass(&ID), Context(*(MCAsmInfo*)0) { +: ImmutablePass(ID), Context(*(MCAsmInfo*)0) { assert(0 && "This MachineModuleInfo constructor should never be called, MMI " "should always be explicitly constructed by LLVMTargetMachine"); abort(); @@ -579,10 +579,3 @@ namespace { } }; } - -MachineModuleInfo::VariableDbgInfoMapTy & -MachineModuleInfo::getVariableDbgInfo() { - std::stable_sort(VariableDbgInfo.begin(), VariableDbgInfo.end(), - VariableDebugSorter()); - return VariableDbgInfo; -} diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 61334fc1790a1..c8f8fafe227e6 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -26,11 +26,21 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -STATISTIC(NumSunk, "Number of machine instructions sunk"); +static cl::opt<bool> +SplitEdges("machine-sink-split", + cl::desc("Split critical edges during machine sinking"), + cl::init(false), cl::Hidden); +static cl::opt<unsigned> +SplitLimit("split-limit", + cl::init(~0u), cl::Hidden); + +STATISTIC(NumSunk, "Number of machine instructions sunk"); +STATISTIC(NumSplit, "Number of critical edges split"); namespace { class MachineSinking : public MachineFunctionPass { @@ -44,7 +54,7 @@ namespace { public: static char ID; // Pass identification - MachineSinking() : MachineFunctionPass(&ID) {} + MachineSinking() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -59,21 +69,28 @@ namespace { } private: bool ProcessBlock(MachineBasicBlock &MBB); + MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *From, + MachineBasicBlock *To); bool SinkInstruction(MachineInstr *MI, bool &SawStore); - bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const; + bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, bool &LocalUse) const; }; } // end anonymous namespace char MachineSinking::ID = 0; -static RegisterPass<MachineSinking> -X("machine-sink", "Machine code sinking"); +INITIALIZE_PASS(MachineSinking, "machine-sink", + "Machine code sinking", false, false); FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); } /// AllUsesDominatedByBlock - Return true if all uses of the specified register -/// occur in blocks dominated by the specified block. +/// occur in blocks dominated by the specified block. If any use is in the +/// definition block, then return false since it is never legal to move def +/// after uses. bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, - MachineBasicBlock *MBB) const { + MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, + bool &LocalUse) const { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); // Ignoring debug uses is necessary so debug info doesn't affect the code. @@ -91,6 +108,9 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // PHI nodes use the operand in the predecessor block, not the block with // the PHI. UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); + } else if (UseBlock == DefMBB) { + LocalUse = true; + return false; } // Check that it dominates. @@ -166,6 +186,66 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { return MadeChange; } +MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB) { + // Avoid breaking back edge. From == To means backedge for single BB loop. + if (!SplitEdges || NumSplit == SplitLimit || FromBB == ToBB) + return 0; + + // Check for more "complex" loops. + if (LI->getLoopFor(FromBB) != LI->getLoopFor(ToBB) || + !LI->isLoopHeader(ToBB)) { + // It's not always legal to break critical edges and sink the computation + // to the edge. + // + // BB#1: + // v1024 + // Beq BB#3 + // <fallthrough> + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted: + // + // BB#1: + // ... + // Bne BB#2 + // BB#4: + // v1024 = + // B BB#3 + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3 + // flow. We need to ensure the new basic block where the computation is + // sunk to dominates all the uses. + // It's only legal to break critical edge and sink the computation to the + // new block if all the predecessors of "To", except for "From", are + // not dominated by "From". Given SSA property, this means these + // predecessors are dominated by "To". + for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(), + E = ToBB->pred_end(); PI != E; ++PI) { + if (*PI == FromBB) + continue; + if (!DT->dominates(ToBB, *PI)) + return 0; + } + + // FIXME: Determine if it's cost effective to break this edge. + return FromBB->SplitCriticalEdge(ToBB, this); + } + + return 0; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { @@ -246,7 +326,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (SuccToSinkTo) { // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. - if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo)) + bool LocalUse = false; + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, LocalUse)) return false; continue; @@ -256,10 +337,14 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // we should sink to. for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), E = ParentBlock->succ_end(); SI != E; ++SI) { - if (AllUsesDominatedByBlock(Reg, *SI)) { + bool LocalUse = false; + if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, LocalUse)) { SuccToSinkTo = *SI; break; } + if (LocalUse) + // Def is used locally, it's never safe to move this def. + return false; } // If we couldn't find a block to sink to, ignore this instruction. @@ -303,27 +388,44 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. + bool TryBreak = false; bool store = true; if (!MI->isSafeToMove(TII, AA, store)) { - DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n"); - return false; + DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); + TryBreak = true; } // We don't want to sink across a critical edge if we don't dominate the // successor. We could be introducing calculations to new code paths. - if (!DT->dominates(ParentBlock, SuccToSinkTo)) { - DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); - return false; + if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { + DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); + TryBreak = true; } // Don't sink instructions into a loop. - if (LI->isLoopHeader(SuccToSinkTo)) { - DEBUG(dbgs() << " *** PUNTING: Loop header found\n"); - return false; + if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { + DEBUG(dbgs() << " *** NOTE: Loop header found\n"); + TryBreak = true; } // Otherwise we are OK with sinking along a critical edge. - DEBUG(dbgs() << "Sinking along critical edge.\n"); + if (!TryBreak) + DEBUG(dbgs() << "Sinking along critical edge.\n"); + else { + MachineBasicBlock *NewSucc = SplitCriticalEdge(ParentBlock, SuccToSinkTo); + if (!NewSucc) { + DEBUG(dbgs() << + " *** PUNTING: Not legal or profitable to break critical edge\n"); + return false; + } else { + DEBUG(dbgs() << " *** Splitting critical edge:" + " BB#" << ParentBlock->getNumber() + << " -- BB#" << NewSucc->getNumber() + << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); + SuccToSinkTo = NewSucc; + ++NumSplit; + } + } } // Determine where to insert into. Skip phi nodes. diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 2297c908b1e00..1e88562935eac 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -1,4 +1,4 @@ -//===-- MachineVerifier.cpp - Machine Code Verifier -------------*- C++ -*-===// +//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===// // // The LLVM Compiler Infrastructure // @@ -24,6 +24,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -44,19 +45,14 @@ using namespace llvm; namespace { struct MachineVerifier { - MachineVerifier(Pass *pass, bool allowDoubleDefs) : + MachineVerifier(Pass *pass) : PASS(pass), - allowVirtDoubleDefs(allowDoubleDefs), - allowPhysDoubleDefs(true), OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) {} bool runOnMachineFunction(MachineFunction &MF); Pass *const PASS; - const bool allowVirtDoubleDefs; - const bool allowPhysDoubleDefs; - const char *const OutFileName; raw_ostream *OS; const MachineFunction *MF; @@ -91,10 +87,6 @@ namespace { // defined. Map value is the user. RegMap vregsLiveIn; - // Vregs that must be dead in because they are defined without being - // killed first. Map value is the defining instruction. - RegMap vregsDeadIn; - // Regs killed in MBB. They may be defined again, and will then be in both // regsKilled and regsLiveOut. RegSet regsKilled; @@ -175,6 +167,7 @@ namespace { // Analysis information if available LiveVariables *LiveVars; + const LiveIntervals *LiveInts; void visitMachineFunctionBefore(); void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); @@ -195,15 +188,14 @@ namespace { void calcRegsRequired(); void verifyLiveVariables(); + void verifyLiveIntervals(); }; struct MachineVerifierPass : public MachineFunctionPass { static char ID; // Pass ID, replacement for typeid - bool AllowDoubleDefs; - explicit MachineVerifierPass(bool allowDoubleDefs = false) - : MachineFunctionPass(&ID), - AllowDoubleDefs(allowDoubleDefs) {} + MachineVerifierPass() + : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -211,7 +203,7 @@ namespace { } bool runOnMachineFunction(MachineFunction &MF) { - MF.verify(this, AllowDoubleDefs); + MF.verify(this); return false; } }; @@ -219,17 +211,15 @@ namespace { } char MachineVerifierPass::ID = 0; -static RegisterPass<MachineVerifierPass> -MachineVer("machineverifier", "Verify generated machine code"); -static const PassInfo *const MachineVerifyID = &MachineVer; +INITIALIZE_PASS(MachineVerifierPass, "machineverifier", + "Verify generated machine code", false, false); -FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) { - return new MachineVerifierPass(allowPhysDoubleDefs); +FunctionPass *llvm::createMachineVerifierPass() { + return new MachineVerifierPass(); } -void MachineFunction::verify(Pass *p, bool allowDoubleDefs) const { - MachineVerifier(p, allowDoubleDefs) - .runOnMachineFunction(const_cast<MachineFunction&>(*this)); +void MachineFunction::verify(Pass *p) const { + MachineVerifier(p).runOnMachineFunction(const_cast<MachineFunction&>(*this)); } bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { @@ -255,10 +245,13 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { TRI = TM->getRegisterInfo(); MRI = &MF.getRegInfo(); + LiveVars = NULL; + LiveInts = NULL; if (PASS) { - LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>(); - } else { - LiveVars = NULL; + LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>(); + // We don't want to verify LiveVariables if LiveIntervals is available. + if (!LiveInts) + LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>(); } visitMachineFunctionBefore(); @@ -512,6 +505,20 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if ((*I)->isStore() && !TI.mayStore()) report("Missing mayStore flag", MI); } + + // Debug values must not have a slot index. + // Other instructions must have one. + if (LiveInts) { + bool mapped = !LiveInts->isNotInMIMap(MI); + if (MI->isDebugValue()) { + if (mapped) + report("Debug instruction has a slot index", MI); + } else { + if (!mapped) + report("Missing slot index", MI); + } + } + } void @@ -570,15 +577,30 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } else isKill = MO->isKill(); - if (isKill) { + if (isKill) addRegWithSubRegs(regsKilled, Reg); - // Check that LiveVars knows this kill - if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg)) { - LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); - if (std::find(VI.Kills.begin(), - VI.Kills.end(), MI) == VI.Kills.end()) - report("Kill missing from LiveVariables", MO, MONum); + // Check that LiveVars knows this kill. + if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && + MO->isKill()) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + if (std::find(VI.Kills.begin(), + VI.Kills.end(), MI) == VI.Kills.end()) + report("Kill missing from LiveVariables", MO, MONum); + } + + // Check LiveInts liveness and kill. + if (LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex(); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (!LI.liveAt(UseIdx)) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // TODO: Verify isKill == LI.killedAt. + } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + report("Virtual register has no Live interval", MO, MONum); } } @@ -607,6 +629,28 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { addRegWithSubRegs(regsDead, Reg); else addRegWithSubRegs(regsDefined, Reg); + + // Check LiveInts for a live range, but only for virtual registers. + if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && + !LiveInts->isNotInMIMap(MI)) { + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex(); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) { + assert(LR->valno && "NULL valno is not allowed"); + if (LR->valno->def != DefIdx) { + report("Inconsistent valno->def", MO, MONum); + *OS << "Valno " << LR->valno->id << " is not defined at " + << DefIdx << " in " << LI << '\n'; + } + } else { + report("No live range at def", MO, MONum); + *OS << DefIdx << " is not live in " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } } // Check register classes. @@ -670,40 +714,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; set_union(MInfo.regsKilled, regsKilled); - set_subtract(regsLive, regsKilled); - regsKilled.clear(); - - // Verify that both <def> and <def,dead> operands refer to dead registers. - RegVector defs(regsDefined); - defs.append(regsDead.begin(), regsDead.end()); - - for (RegVector::const_iterator I = defs.begin(), E = defs.end(); - I != E; ++I) { - if (regsLive.count(*I)) { - if (TargetRegisterInfo::isPhysicalRegister(*I)) { - if (!allowPhysDoubleDefs && !isReserved(*I) && - !regsLiveInButUnused.count(*I)) { - report("Redefining a live physical register", MI); - *OS << "Register " << TRI->getName(*I) - << " was defined but already live.\n"; - } - } else { - if (!allowVirtDoubleDefs) { - report("Redefining a live virtual register", MI); - *OS << "Virtual register %reg" << *I - << " was defined but already live.\n"; - } - } - } else if (TargetRegisterInfo::isVirtualRegister(*I) && - !MInfo.regsKilled.count(*I)) { - // Virtual register defined without being killed first must be dead on - // entry. - MInfo.vregsDeadIn.insert(std::make_pair(*I, MI)); - } - } - - set_subtract(regsLive, regsDead); regsDead.clear(); - set_union(regsLive, regsDefined); regsDefined.clear(); + set_subtract(regsLive, regsKilled); regsKilled.clear(); + set_subtract(regsLive, regsDead); regsDead.clear(); + set_union(regsLive, regsDefined); regsDefined.clear(); } void @@ -828,35 +841,15 @@ void MachineVerifier::visitMachineFunctionAfter() { continue; checkPHIOps(MFI); - - // Verify dead-in virtual registers. - if (!allowVirtDoubleDefs) { - for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(), - PrE = MFI->pred_end(); PrI != PrE; ++PrI) { - BBInfo &PrInfo = MBBInfoMap[*PrI]; - if (!PrInfo.reachable) - continue; - - for (RegMap::iterator I = MInfo.vregsDeadIn.begin(), - E = MInfo.vregsDeadIn.end(); I != E; ++I) { - // DeadIn register must be in neither regsLiveOut or vregsPassed of - // any predecessor. - if (PrInfo.isLiveOut(I->first)) { - report("Live-in virtual register redefined", I->second); - *OS << "Register %reg" << I->first - << " was live-out from predecessor MBB #" - << (*PrI)->getNumber() << ".\n"; - } - } - } - } } - // Now check LiveVariables info if available - if (LiveVars) { + // Now check liveness info if available + if (LiveVars || LiveInts) calcRegsRequired(); + if (LiveVars) verifyLiveVariables(); - } + if (LiveInts) + verifyLiveIntervals(); } void MachineVerifier::verifyLiveVariables() { @@ -886,4 +879,55 @@ void MachineVerifier::verifyLiveVariables() { } } +void MachineVerifier::verifyLiveIntervals() { + assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); + for (LiveIntervals::const_iterator LVI = LiveInts->begin(), + LVE = LiveInts->end(); LVI != LVE; ++LVI) { + const LiveInterval &LI = *LVI->second; + assert(LVI->first == LI.reg && "Invalid reg to interval mapping"); + + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I!=E; ++I) { + VNInfo *VNI = *I; + const LiveRange *DefLR = LI.getLiveRangeContaining(VNI->def); + + if (!DefLR) { + if (!VNI->isUnused()) { + report("Valno not live at def and not marked unused", MF); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + } + continue; + } + + if (VNI->isUnused()) + continue; + + if (DefLR->valno != VNI) { + report("Live range at def has different valno", MF); + DefLR->print(*OS); + *OS << " should use valno #" << VNI->id << " in " << LI << '\n'; + } + + } + + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) { + const LiveRange &LR = *I; + assert(LR.valno && "Live range has no valno"); + + if (LR.valno->id >= LI.getNumValNums() || + LR.valno != LI.getValNumInfo(LR.valno->id)) { + report("Foreign valno in live range", MF); + LR.print(*OS); + *OS << " has a valno not in " << LI << '\n'; + } + + if (LR.valno->isUnused()) { + report("Live range valno is marked unused", MF); + LR.print(*OS); + *OS << " in " << LI << '\n'; + } + + } + } +} diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp deleted file mode 100644 index dcdc243e5db34..0000000000000 --- a/lib/CodeGen/OptimizeExts.cpp +++ /dev/null @@ -1,220 +0,0 @@ -//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass performs optimization of sign / zero extension instructions. It -// may be extended to handle other instructions of similar property. -// -// On some targets, some instructions, e.g. X86 sign / zero extension, may -// leave the source value in the lower part of the result. This pass will -// replace (some) uses of the pre-extension value with uses of the sub-register -// of the results. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ext-opt" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden, - cl::desc("Aggressive extension optimization")); - -STATISTIC(NumReuse, "Number of extension results reused"); - -namespace { - class OptimizeExts : public MachineFunctionPass { - const TargetMachine *TM; - const TargetInstrInfo *TII; - MachineRegisterInfo *MRI; - MachineDominatorTree *DT; // Machine dominator tree - - public: - static char ID; // Pass identification - OptimizeExts() : MachineFunctionPass(&ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - if (Aggressive) { - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); - } - } - - private: - bool OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &LocalMIs); - }; -} - -char OptimizeExts::ID = 0; -static RegisterPass<OptimizeExts> -X("opt-exts", "Optimize sign / zero extensions"); - -FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); } - -/// OptimizeInstr - If instruction is a copy-like instruction, i.e. it reads -/// a single register and writes a single register and it does not modify -/// the source, and if the source value is preserved as a sub-register of -/// the result, then replace all reachable uses of the source with the subreg -/// of the result. -/// Do not generate an EXTRACT that is used only in a debug use, as this -/// changes the code. Since this code does not currently share EXTRACTs, just -/// ignore all debug uses. -bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &LocalMIs) { - bool Changed = false; - LocalMIs.insert(MI); - - unsigned SrcReg, DstReg, SubIdx; - if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) { - if (TargetRegisterInfo::isPhysicalRegister(DstReg) || - TargetRegisterInfo::isPhysicalRegister(SrcReg)) - return false; - - MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); - if (++UI == MRI->use_nodbg_end()) - // No other uses. - return false; - - // Ok, the source has other uses. See if we can replace the other uses - // with use of the result of the extension. - SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; - UI = MRI->use_nodbg_begin(DstReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); - UI != UE; ++UI) - ReachedBBs.insert(UI->getParent()); - - bool ExtendLife = true; - // Uses that are in the same BB of uses of the result of the instruction. - SmallVector<MachineOperand*, 8> Uses; - // Uses that the result of the instruction can reach. - SmallVector<MachineOperand*, 8> ExtendedUses; - - UI = MRI->use_nodbg_begin(SrcReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); - UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = &*UI; - if (UseMI == MI) - continue; - if (UseMI->isPHI()) { - ExtendLife = false; - continue; - } - - // It's an error to translate this: - // - // %reg1025 = <sext> %reg1024 - // ... - // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 - // - // into this: - // - // %reg1025 = <sext> %reg1024 - // ... - // %reg1027 = COPY %reg1025:4 - // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 - // - // The problem here is that SUBREG_TO_REG is there to assert that an - // implicit zext occurs. It doesn't insert a zext instruction. If we allow - // the COPY here, it will give us the value after the <sext>, - // not the original value of %reg1024 before <sext>. - if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) - continue; - - MachineBasicBlock *UseMBB = UseMI->getParent(); - if (UseMBB == MBB) { - // Local uses that come after the extension. - if (!LocalMIs.count(UseMI)) - Uses.push_back(&UseMO); - } else if (ReachedBBs.count(UseMBB)) - // Non-local uses where the result of extension is used. Always - // replace these unless it's a PHI. - Uses.push_back(&UseMO); - else if (Aggressive && DT->dominates(MBB, UseMBB)) - // We may want to extend live range of the extension result in order - // to replace these uses. - ExtendedUses.push_back(&UseMO); - else { - // Both will be live out of the def MBB anyway. Don't extend live - // range of the extension result. - ExtendLife = false; - break; - } - } - - if (ExtendLife && !ExtendedUses.empty()) - // Ok, we'll extend the liveness of the extension result. - std::copy(ExtendedUses.begin(), ExtendedUses.end(), - std::back_inserter(Uses)); - - // Now replace all uses. - if (!Uses.empty()) { - SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; - // Look for PHI uses of the extended result, we don't want to extend the - // liveness of a PHI input. It breaks all kinds of assumptions down - // stream. A PHI use is expected to be the kill of its source values. - UI = MRI->use_nodbg_begin(DstReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); - UI != UE; ++UI) - if (UI->isPHI()) - PHIBBs.insert(UI->getParent()); - - const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - for (unsigned i = 0, e = Uses.size(); i != e; ++i) { - MachineOperand *UseMO = Uses[i]; - MachineInstr *UseMI = UseMO->getParent(); - MachineBasicBlock *UseMBB = UseMI->getParent(); - if (PHIBBs.count(UseMBB)) - continue; - unsigned NewVR = MRI->createVirtualRegister(RC); - BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVR) - .addReg(DstReg, 0, SubIdx); - UseMO->setReg(NewVR); - ++NumReuse; - Changed = true; - } - } - } - - return Changed; -} - -bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) { - TM = &MF.getTarget(); - TII = TM->getInstrInfo(); - MRI = &MF.getRegInfo(); - DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; - - bool Changed = false; - - SmallPtrSet<MachineInstr*, 8> LocalMIs; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = &*I; - LocalMIs.clear(); - for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME; - ++MII) { - MachineInstr *MI = &*MII; - Changed |= OptimizeInstr(MI, MBB, LocalMIs); - } - } - - return Changed; -} diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 1613fe21e42dd..edb4eea71b8a7 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -33,7 +33,7 @@ namespace { public: static char ID; // Pass identification - OptimizePHIs() : MachineFunctionPass(&ID) {} + OptimizePHIs() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -54,8 +54,8 @@ namespace { } char OptimizePHIs::ID = 0; -static RegisterPass<OptimizePHIs> -X("opt-phis", "Optimize machine instruction PHIs"); +INITIALIZE_PASS(OptimizePHIs, "opt-phis", + "Optimize machine instruction PHIs", false, false); FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); } @@ -101,16 +101,10 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); // Skip over register-to-register moves. - unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx; - if (SrcMI && - TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) && - SrcSubIdx == 0 && DstSubIdx == 0 && - TargetRegisterInfo::isVirtualRegister(MvSrcReg)) - SrcMI = MRI->getVRegDef(MvSrcReg); - else if (SrcMI && SrcMI->isCopy() && - !SrcMI->getOperand(0).getSubReg() && - !SrcMI->getOperand(1).getSubReg() && - TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) + if (SrcMI && SrcMI->isCopy() && + !SrcMI->getOperand(0).getSubReg() && + !SrcMI->getOperand(1).getSubReg() && + TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); if (!SrcMI) return false; diff --git a/lib/CodeGen/PBQP/HeuristicBase.h b/lib/CodeGen/PBQP/HeuristicBase.h index 3bb24e1cc370c..791c227f0d07e 100644 --- a/lib/CodeGen/PBQP/HeuristicBase.h +++ b/lib/CodeGen/PBQP/HeuristicBase.h @@ -173,9 +173,13 @@ namespace PBQP { bool finished = false; while (!finished) { - if (!optimalReduce()) - if (!impl().heuristicReduce()) + if (!optimalReduce()) { + if (impl().heuristicReduce()) { + getSolver().recordRN(); + } else { finished = true; + } + } } } diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h index 02938df007004..35514f9674785 100644 --- a/lib/CodeGen/PBQP/HeuristicSolver.h +++ b/lib/CodeGen/PBQP/HeuristicSolver.h @@ -226,6 +226,8 @@ namespace PBQP { // Nothing to do. Just push the node onto the reduction stack. pushToStack(nItr); + + s.recordR0(); } /// \brief Apply rule R1. @@ -274,6 +276,7 @@ namespace PBQP { assert(nd.getSolverDegree() == 0 && "Degree 1 with edge removed should be 0."); pushToStack(xnItr); + s.recordR1(); } /// \brief Apply rule R2. @@ -378,8 +381,14 @@ namespace PBQP { removeSolverEdge(zxeItr); pushToStack(xnItr); + s.recordR2(); } + /// \brief Record an application of the RN rule. + /// + /// For use by the HeuristicBase. + void recordRN() { s.recordRN(); } + private: NodeData& getSolverNodeData(Graph::NodeItr nItr) { diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h index 4c1ce119ed050..18eaf7c0da9b3 100644 --- a/lib/CodeGen/PBQP/Heuristics/Briggs.h +++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -52,9 +52,7 @@ namespace PBQP { bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const { if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr)) return true; - if (s->getSolverDegree(n1Itr) < s->getSolverDegree(n2Itr)) - return false; - return (&*n1Itr < &*n2Itr); + return false; } private: HeuristicSolverImpl<Briggs> *s; @@ -69,9 +67,7 @@ namespace PBQP { cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr); if (cost1 < cost2) return true; - if (cost1 > cost2) - return false; - return (&*n1Itr < &*n2Itr); + return false; } private: diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h index 294b5370afdfe..047fd04c7cb8f 100644 --- a/lib/CodeGen/PBQP/Solution.h +++ b/lib/CodeGen/PBQP/Solution.h @@ -26,15 +26,46 @@ namespace PBQP { /// To get the selection for each node in the problem use the getSelection method. class Solution { private: + typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap; SelectionsMap selections; + unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions; + public: /// \brief Number of nodes for which selections have been made. /// @return Number of nodes for which selections have been made. unsigned numNodes() const { return selections.size(); } + /// \brief Records a reduction via the R0 rule. Should be called from the + /// solver only. + void recordR0() { ++r0Reductions; } + + /// \brief Returns the number of R0 reductions applied to solve the problem. + unsigned numR0Reductions() const { return r0Reductions; } + + /// \brief Records a reduction via the R1 rule. Should be called from the + /// solver only. + void recordR1() { ++r1Reductions; } + + /// \brief Returns the number of R1 reductions applied to solve the problem. + unsigned numR1Reductions() const { return r1Reductions; } + + /// \brief Records a reduction via the R2 rule. Should be called from the + /// solver only. + void recordR2() { ++r2Reductions; } + + /// \brief Returns the number of R2 reductions applied to solve the problem. + unsigned numR2Reductions() const { return r2Reductions; } + + /// \brief Records a reduction via the RN rule. Should be called from the + /// solver only. + void recordRN() { ++ rNReductions; } + + /// \brief Returns the number of RN reductions applied to solve the problem. + unsigned numRNReductions() const { return rNReductions; } + /// \brief Set the selection for a given node. /// @param nItr Node iterator. /// @param selection Selection for nItr. diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index ea6b094d7efe4..d4df4c548711e 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Function.h" @@ -37,16 +38,15 @@ STATISTIC(NumAtomic, "Number of atomic phis lowered"); STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; -static RegisterPass<PHIElimination> -X("phi-node-elimination", "Eliminate PHI nodes for register allocation"); +INITIALIZE_PASS(PHIElimination, "phi-node-elimination", + "Eliminate PHI nodes for register allocation", false, false); -const PassInfo *const llvm::PHIEliminationID = &X; +char &llvm::PHIEliminationID = PHIElimination::ID; void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<LiveVariables>(); AU.addPreserved<MachineDominatorTree>(); - // rdar://7401784 This would be nice: - // AU.addPreservedID(MachineLoopInfoID); + AU.addPreserved<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -56,9 +56,11 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; // Split critical edges to help the coalescer - if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) + if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) { + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= SplitPHIEdges(MF, *I, *LV); + Changed |= SplitPHIEdges(MF, *I, *LV, MLI); + } // Populate VRegPHIUseCount analyzePHINodes(MF); @@ -179,6 +181,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; unsigned DestReg = MPhi->getOperand(0).getReg(); + assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs"); bool isDead = MPhi->getOperand(0).isDead(); // Create a new register for the incoming PHI arguments. @@ -265,6 +268,8 @@ void llvm::PHIElimination::LowerAtomicPHINode( SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; for (int i = NumSrcs - 1; i >= 0; --i) { unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); + unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); @@ -294,7 +299,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Insert the copy. if (!reusedIncoming && IncomingReg) BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg); + TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg); // Now update live variable information if we have it. Otherwise we're done if (!LV) continue; @@ -378,10 +383,12 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) { bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - LiveVariables &LV) { + LiveVariables &LV, + MachineLoopInfo *MLI) { if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. + bool Changed = false; for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { @@ -390,8 +397,15 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, // We break edges when registers are live out from the predecessor block // (not considering PHI nodes). If the register is live in to this block // anyway, we would gain nothing from splitting. - if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) - PreMBB->SplitCriticalEdge(&MBB, this); + // Avoid splitting backedges of loops. It would introduce small + // out-of-line blocks into the loop which is very bad for code placement. + if (PreMBB != &MBB && + !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) { + if (!MLI || + !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) && + MLI->isLoopHeader(&MBB))) + Changed |= PreMBB->SplitCriticalEdge(&MBB, this) != 0; + } } } return true; diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h index 7dedf0318a8a6..45a97182e71c5 100644 --- a/lib/CodeGen/PHIElimination.h +++ b/lib/CodeGen/PHIElimination.h @@ -13,19 +13,21 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { class LiveVariables; + class MachineRegisterInfo; + class MachineLoopInfo; /// Lower PHI instructions to copies. class PHIElimination : public MachineFunctionPass { - MachineRegisterInfo *MRI; // Machine register information + MachineRegisterInfo *MRI; // Machine register information public: static char ID; // Pass identification, replacement for typeid - PHIElimination() : MachineFunctionPass(&ID) {} + PHIElimination() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -49,7 +51,7 @@ namespace llvm { /// Split critical edges where necessary for good coalescer performance. bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - LiveVariables &LV); + LiveVariables &LV, MachineLoopInfo *MLI); /// SplitCriticalEdge - Split a critical edge from A to B by /// inserting a new MBB. Update branches in A and PHI instructions diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp new file mode 100644 index 0000000000000..17cee46ca16c6 --- /dev/null +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -0,0 +1,287 @@ +//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Perform peephole optimizations on the machine code: +// +// - Optimize Extensions +// +// Optimization of sign / zero extension instructions. It may be extended to +// handle other instructions with similar properties. +// +// On some targets, some instructions, e.g. X86 sign / zero extension, may +// leave the source value in the lower part of the result. This optimization +// will replace some uses of the pre-extension value with uses of the +// sub-register of the results. +// +// - Optimize Comparisons +// +// Optimization of comparison instructions. For instance, in this code: +// +// sub r1, 1 +// cmp r1, 0 +// bz L1 +// +// If the "sub" instruction all ready sets (or could be modified to set) the +// same flag that the "cmp" instruction sets and that "bz" uses, then we can +// eliminate the "cmp" instruction. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "peephole-opt" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +// Optimize Extensions +static cl::opt<bool> +Aggressive("aggressive-ext-opt", cl::Hidden, + cl::desc("Aggressive extension optimization")); + +STATISTIC(NumReuse, "Number of extension results reused"); +STATISTIC(NumEliminated, "Number of compares eliminated"); + +namespace { + class PeepholeOptimizer : public MachineFunctionPass { + const TargetMachine *TM; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + MachineDominatorTree *DT; // Machine dominator tree + + public: + static char ID; // Pass identification + PeepholeOptimizer() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + if (Aggressive) { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + } + } + + private: + bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); + bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &LocalMIs); + }; +} + +char PeepholeOptimizer::ID = 0; +INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts", + "Peephole Optimizations", false, false); + +FunctionPass *llvm::createPeepholeOptimizerPass() { + return new PeepholeOptimizer(); +} + +/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads +/// a single register and writes a single register and it does not modify the +/// source, and if the source value is preserved as a sub-register of the +/// result, then replace all reachable uses of the source with the subreg of the +/// result. +/// +/// Do not generate an EXTRACT that is used only in a debug use, as this changes +/// the code. Since this code does not currently share EXTRACTs, just ignore all +/// debug uses. +bool PeepholeOptimizer:: +OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &LocalMIs) { + LocalMIs.insert(MI); + + unsigned SrcReg, DstReg, SubIdx; + if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) + return false; + + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); + if (++UI == MRI->use_nodbg_end()) + // No other uses. + return false; + + // The source has other uses. See if we can replace the other uses with use of + // the result of the extension. + SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; + UI = MRI->use_nodbg_begin(DstReg); + for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + UI != UE; ++UI) + ReachedBBs.insert(UI->getParent()); + + // Uses that are in the same BB of uses of the result of the instruction. + SmallVector<MachineOperand*, 8> Uses; + + // Uses that the result of the instruction can reach. + SmallVector<MachineOperand*, 8> ExtendedUses; + + bool ExtendLife = true; + UI = MRI->use_nodbg_begin(SrcReg); + for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + if (UseMI == MI) + continue; + + if (UseMI->isPHI()) { + ExtendLife = false; + continue; + } + + // It's an error to translate this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 + // + // into this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1027 = COPY %reg1025:4 + // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 + // + // The problem here is that SUBREG_TO_REG is there to assert that an + // implicit zext occurs. It doesn't insert a zext instruction. If we allow + // the COPY here, it will give us the value after the <sext>, not the + // original value of %reg1024 before <sext>. + if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) + continue; + + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (UseMBB == MBB) { + // Local uses that come after the extension. + if (!LocalMIs.count(UseMI)) + Uses.push_back(&UseMO); + } else if (ReachedBBs.count(UseMBB)) { + // Non-local uses where the result of the extension is used. Always + // replace these unless it's a PHI. + Uses.push_back(&UseMO); + } else if (Aggressive && DT->dominates(MBB, UseMBB)) { + // We may want to extend the live range of the extension result in order + // to replace these uses. + ExtendedUses.push_back(&UseMO); + } else { + // Both will be live out of the def MBB anyway. Don't extend live range of + // the extension result. + ExtendLife = false; + break; + } + } + + if (ExtendLife && !ExtendedUses.empty()) + // Extend the liveness of the extension result. + std::copy(ExtendedUses.begin(), ExtendedUses.end(), + std::back_inserter(Uses)); + + // Now replace all uses. + bool Changed = false; + if (!Uses.empty()) { + SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; + + // Look for PHI uses of the extended result, we don't want to extend the + // liveness of a PHI input. It breaks all kinds of assumptions down + // stream. A PHI use is expected to be the kill of its source values. + UI = MRI->use_nodbg_begin(DstReg); + for (MachineRegisterInfo::use_nodbg_iterator + UE = MRI->use_nodbg_end(); UI != UE; ++UI) + if (UI->isPHI()) + PHIBBs.insert(UI->getParent()); + + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + for (unsigned i = 0, e = Uses.size(); i != e; ++i) { + MachineOperand *UseMO = Uses[i]; + MachineInstr *UseMI = UseMO->getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (PHIBBs.count(UseMBB)) + continue; + + unsigned NewVR = MRI->createVirtualRegister(RC); + BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) + .addReg(DstReg, 0, SubIdx); + + UseMO->setReg(NewVR); + ++NumReuse; + Changed = true; + } + } + + return Changed; +} + +/// OptimizeCmpInstr - If the instruction is a compare and the previous +/// instruction it's comparing against all ready sets (or could be modified to +/// set) the same flag as the compare, then we can remove the comparison and use +/// the flag from the previous instruction. +bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, + MachineBasicBlock *MBB) { + // If this instruction is a comparison against zero and isn't comparing a + // physical register, we can try to optimize it. + unsigned SrcReg; + int CmpValue; + if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) || + TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0) + return false; + + MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); + if (llvm::next(DI) != MRI->def_end()) + // Only support one definition. + return false; + + // Attempt to convert the defining instruction to set the "zero" flag. + if (TII->ConvertToSetZeroFlag(&*DI, MI)) { + ++NumEliminated; + return true; + } + + return false; +} + +bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + MRI = &MF.getRegInfo(); + DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; + + bool Changed = false; + + SmallPtrSet<MachineInstr*, 8> LocalMIs; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + LocalMIs.clear(); + + for (MachineBasicBlock::iterator + MII = I->begin(), ME = I->end(); MII != ME; ) { + MachineInstr *MI = &*MII; + + if (MI->getDesc().isCompare() && + !MI->getDesc().hasUnmodeledSideEffects()) { + ++MII; // The iterator may become invalid if the compare is deleted. + Changed |= OptimizeCmpInstr(MI, MBB); + } else { + Changed |= OptimizeExtInstr(MI, MBB, LocalMIs); + ++MII; + } + } + } + + return Changed; +} diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 4af8e07f34800..f0bd6d1372be7 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -85,7 +85,7 @@ namespace { public: static char ID; PostRAScheduler(CodeGenOpt::Level ol) : - MachineFunctionPass(&ID), OptLevel(ol) {} + MachineFunctionPass(ID), OptLevel(ol) {} void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -130,7 +130,7 @@ namespace { /// KillIndices - The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. - unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<unsigned> KillIndices; public: SchedulePostRATDList(MachineFunction &MF, @@ -140,7 +140,8 @@ namespace { AntiDepBreaker *ADB, AliasAnalysis *aa) : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), - HazardRec(HR), AntiDepBreak(ADB), AA(aa) {} + HazardRec(HR), AntiDepBreak(ADB), AA(aa), + KillIndices(TRI->getNumRegs()) {} ~SchedulePostRATDList() { } diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index fb2f90935551c..cd9d83eeb6846 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -92,7 +92,7 @@ namespace { public: static char ID; PreAllocSplitting() - : MachineFunctionPass(&ID) {} + : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -203,10 +203,11 @@ namespace { char PreAllocSplitting::ID = 0; -static RegisterPass<PreAllocSplitting> -X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting"); +INITIALIZE_PASS(PreAllocSplitting, "pre-alloc-splitting", + "Pre-Register Allocation Live Interval Splitting", + false, false); -const PassInfo *const llvm::PreAllocSplittingID = &X; +char &llvm::PreAllocSplittingID = PreAllocSplitting::ID; /// findSpillPoint - Find a gap as far away from the given MI that's suitable /// for spilling the current live interval. The index must be before any @@ -676,11 +677,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc); // If the def is a move, set the copy field. - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (DstReg == LI->reg) - NewVN->setCopy(&*DI); - } else if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) + if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) NewVN->setCopy(&*DI); NewVNs[&*DI] = NewVN; diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 2e31908f9fe2f..b8831db1d118a 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -26,8 +26,8 @@ using namespace llvm; char ProcessImplicitDefs::ID = 0; -static RegisterPass<ProcessImplicitDefs> X("processimpdefs", - "Process Implicit Definitions."); +INITIALIZE_PASS(ProcessImplicitDefs, "processimpdefs", + "Process Implicit Definitions.", false, false); void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -46,12 +46,6 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg, unsigned OpIdx, const TargetInstrInfo *tii_, SmallSet<unsigned, 8> &ImpDefRegs) { - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg && - (DstSubReg == 0 || ImpDefRegs.count(DstReg))) - return true; - switch(OpIdx) { case 1: return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 || @@ -75,14 +69,6 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg, return true; return false; } - - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) { - if (Reg != SrcReg) - return false; - if (DstSubReg == 0 || ImpDefRegs.count(DstReg)) - return true; - } return false; } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 3843b2537051c..e2802c1fdf4a7 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -19,6 +19,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "pei" #include "PrologEpilogInserter.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -32,7 +33,10 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include <climits> @@ -40,8 +44,11 @@ using namespace llvm; char PEI::ID = 0; -static RegisterPass<PEI> -X("prologepilog", "Prologue/Epilogue Insertion"); +INITIALIZE_PASS(PEI, "prologepilog", + "Prologue/Epilogue Insertion", false, false); + +STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered"); +STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); /// createPrologEpilogCodeInserter - This function returns a pass that inserts /// prolog and epilog code, and eliminates abstract frame references. @@ -56,7 +63,6 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); - FrameConstantRegMap.clear(); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo @@ -72,10 +78,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: - // - with shrink wrapping, place spills and restores to tightly + // - With shrink wrapping, place spills and restores to tightly // enclose regions in the Machine CFG of the function where - // they are used. Without shrink wrapping - // - default (no shrink wrapping), place all spills in the + // they are used. + // - Without shink wrapping (default), place all spills in the // entry block, all restores in return blocks. placeCSRSpillsAndRestores(Fn); @@ -461,8 +467,10 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, Offset = (Offset + Align - 1) / Align * Align; if (StackGrowsDown) { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset } else { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); MFI->setObjectOffset(FrameIdx, Offset); Offset += MFI->getObjectSize(FrameIdx); } @@ -547,15 +555,66 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } + // FIXME: Once this is working, then enable flag will change to a target + // check for whether the frame is large enough to want to use virtual + // frame index registers. Functions which don't want/need this optimization + // will continue to use the existing code path. + if (MFI->getUseLocalStackAllocationBlock()) { + unsigned Align = MFI->getLocalFrameMaxAlign(); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); + + // Resolve offsets for objects in the local block. + for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { + std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); + int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; + DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << + FIOffset << "]\n"); + MFI->setObjectOffset(Entry.first, FIOffset); + } + // Allocate the local block + Offset += MFI->getLocalFrameSize(); + + MaxAlign = std::max(Align, MaxAlign); + } + // Make sure that the stack protector comes before the local variables on the // stack. - if (MFI->getStackProtectorIndex() >= 0) + SmallSet<int, 16> LargeStackObjs; + if (MFI->getStackProtectorIndex() >= 0) { AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && (int)i == RS->getScavengingFrameIndex()) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (!MFI->MayNeedStackProtector(i)) + continue; + + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + LargeStackObjs.insert(i); + } + } + // Then assign frame offsets to stack objects that are not used to spill // callee saved registers. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; if (RS && (int)i == RS->getScavengingFrameIndex()) @@ -564,6 +623,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; + if (LargeStackObjs.count(i)) + continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); } @@ -694,16 +755,8 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. - TargetRegisterInfo::FrameIndexValue Value; - unsigned VReg = - TRI.eliminateFrameIndex(MI, SPAdj, &Value, + TRI.eliminateFrameIndex(MI, SPAdj, FrameIndexVirtualScavenging ? NULL : RS); - if (VReg) { - assert (FrameIndexVirtualScavenging && - "Not scavenging, but virtual returned from " - "eliminateFrameIndex()!"); - FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj); - } // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { @@ -731,38 +784,6 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { } } -/// findLastUseReg - find the killing use of the specified register within -/// the instruciton range. Return the operand number of the kill in Operand. -static MachineBasicBlock::iterator -findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME, - unsigned Reg) { - // Scan forward to find the last use of this virtual register - for (++I; I != ME; ++I) { - MachineInstr *MI = I; - bool isDefInsn = false; - bool isKillInsn = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) - if (MI->getOperand(i).isReg()) { - unsigned OpReg = MI->getOperand(i).getReg(); - if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg)) - continue; - assert (OpReg == Reg - && "overlapping use of scavenged index register!"); - // If this is the killing use, we have a candidate. - if (MI->getOperand(i).isKill()) - isKillInsn = true; - else if (MI->getOperand(i).isDef()) - isDefInsn = true; - } - if (isKillInsn && !isDefInsn) - return I; - } - // If we hit the end of the basic block, there was no kill of - // the virtual register, which is wrong. - assert (0 && "scavenged index register never killed!"); - return ME; -} - /// scavengeFrameVirtualRegs - Replace all frame index virtual registers /// with physical registers. Use the register scavenger to find an /// appropriate register to use. @@ -772,27 +793,14 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { E = Fn.end(); BB != E; ++BB) { RS->enterBasicBlock(BB); - // FIXME: The logic flow in this function is still too convoluted. - // It needs a cleanup refactoring. Do that in preparation for tracking - // more than one scratch register value and using ranges to find - // available scratch registers. - unsigned CurrentVirtReg = 0; - unsigned CurrentScratchReg = 0; - bool havePrevValue = false; - TargetRegisterInfo::FrameIndexValue PrevValue(0,0); - TargetRegisterInfo::FrameIndexValue Value(0,0); - MachineInstr *PrevLastUseMI = NULL; - unsigned PrevLastUseOp = 0; - bool trackingCurrentValue = false; + unsigned VirtReg = 0; + unsigned ScratchReg = 0; int SPAdj = 0; // The instruction stream may change in the loop, so check BB->end() // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { MachineInstr *MI = I; - bool isDefInsn = false; - bool isKillInsn = false; - bool clobbersScratchReg = false; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { @@ -800,121 +808,30 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { unsigned Reg = MO.getReg(); if (Reg == 0) continue; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - // If we have a previous scratch reg, check and see if anything - // here kills whatever value is in there. - if (Reg == CurrentScratchReg) { - if (MO.isUse()) { - // Two-address operands implicitly kill - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) - clobbersScratchReg = true; - } else { - assert (MO.isDef()); - clobbersScratchReg = true; - } - } + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - } - // If this is a def, remember that this insn defines the value. - // This lets us properly consider insns which re-use the scratch - // register, such as r2 = sub r2, #imm, in the middle of the - // scratch range. - if (MO.isDef()) - isDefInsn = true; + + ++NumVirtualFrameRegs; // Have we already allocated a scratch register for this virtual? - if (Reg != CurrentVirtReg) { + if (Reg != VirtReg) { // When we first encounter a new virtual register, it // must be a definition. assert(MI->getOperand(i).isDef() && "frame index virtual missing def!"); - // We can't have nested virtual register live ranges because - // there's only a guarantee of one scavenged register at a time. - assert (CurrentVirtReg == 0 && - "overlapping frame index virtual registers!"); - - // If the target gave us information about what's in the register, - // we can use that to re-use scratch regs. - DenseMap<unsigned, FrameConstantEntry>::iterator Entry = - FrameConstantRegMap.find(Reg); - trackingCurrentValue = Entry != FrameConstantRegMap.end(); - if (trackingCurrentValue) { - SPAdj = (*Entry).second.second; - Value = (*Entry).second.first; - } else { - SPAdj = 0; - Value.first = 0; - Value.second = 0; - } - - // If the scratch register from the last allocation is still - // available, see if the value matches. If it does, just re-use it. - if (trackingCurrentValue && havePrevValue && PrevValue == Value) { - // FIXME: This assumes that the instructions in the live range - // for the virtual register are exclusively for the purpose - // of populating the value in the register. That's reasonable - // for these frame index registers, but it's still a very, very - // strong assumption. rdar://7322732. Better would be to - // explicitly check each instruction in the range for references - // to the virtual register. Only delete those insns that - // touch the virtual register. - - // Find the last use of the new virtual register. Remove all - // instruction between here and there, and update the current - // instruction to reference the last use insn instead. - MachineBasicBlock::iterator LastUseMI = - findLastUseReg(I, BB->end(), Reg); - - // Remove all instructions up 'til the last use, since they're - // just calculating the value we already have. - BB->erase(I, LastUseMI); - I = LastUseMI; - - // Extend the live range of the scratch register - PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false); - RS->setUsed(CurrentScratchReg); - CurrentVirtReg = Reg; - - // We deleted the instruction we were scanning the operands of. - // Jump back to the instruction iterator loop. Don't increment - // past this instruction since we updated the iterator already. - DoIncr = false; - break; - } - // Scavenge a new scratch register - CurrentVirtReg = Reg; + VirtReg = Reg; const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); - PrevValue = Value; + ScratchReg = RS->scavengeRegister(RC, I, SPAdj); + ++NumScavengedRegs; } // replace this reference to the virtual register with the // scratch register. - assert (CurrentScratchReg && "Missing scratch register!"); - MI->getOperand(i).setReg(CurrentScratchReg); + assert (ScratchReg && "Missing scratch register!"); + MI->getOperand(i).setReg(ScratchReg); - if (MI->getOperand(i).isKill()) { - isKillInsn = true; - PrevLastUseOp = i; - PrevLastUseMI = MI; - } } } - // If this is the last use of the scratch, stop tracking it. The - // last use will be a kill operand in an instruction that does - // not also define the scratch register. - if (isKillInsn && !isDefInsn) { - CurrentVirtReg = 0; - havePrevValue = trackingCurrentValue; - } - // Similarly, notice if instruction clobbered the value in the - // register we're tracking for possible later reuse. This is noted - // above, but enforced here since the value is still live while we - // process the rest of the operands of the instruction. - if (clobbersScratchReg) { - havePrevValue = false; - CurrentScratchReg = 0; - } if (DoIncr) { RS->forward(I); ++I; diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index aa95773596cfe..d575124a6b3e6 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -36,7 +36,7 @@ namespace llvm { class PEI : public MachineFunctionPass { public: static char ID; - PEI() : MachineFunctionPass(&ID) {} + PEI() : MachineFunctionPass(ID) {} const char *getPassName() const { return "Prolog/Epilog Insertion & Frame Finalization"; @@ -99,13 +99,6 @@ namespace llvm { // TRI->requiresFrameIndexScavenging() for the curren function. bool FrameIndexVirtualScavenging; - // When using the scavenger post-pass to resolve frame reference - // materialization registers, maintain a map of the registers to - // the constant value and SP adjustment associated with it. - typedef std::pair<TargetRegisterInfo::FrameIndexValue, int> - FrameConstantEntry; - DenseMap<unsigned, FrameConstantEntry> FrameConstantRegMap; - #ifndef NDEBUG // Machine function handle. MachineFunction* MF; diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index f44478e5dd0bf..fc150d55e2265 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -16,6 +16,7 @@ #include "llvm/BasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -46,7 +47,7 @@ namespace { class RAFast : public MachineFunctionPass { public: static char ID; - RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1), + RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), isBulkSpilling(false) {} private: const TargetMachine *TM; @@ -80,6 +81,8 @@ namespace { // that is currently available in a physical register. LiveRegMap LiveVirtRegs; + DenseMap<unsigned, MachineInstr *> LiveDbgValueMap; + // RegState - Track the state of a physical register. enum RegState { // A disabled register is not available for allocation, but an alias may @@ -110,9 +113,9 @@ namespace { // Allocatable - vector of allocatable physical registers. BitVector Allocatable; - // SkippedInstrs - Descriptors of instructions whose clobber list was ignored - // because all registers were spilled. It is still necessary to mark all the - // clobbered registers as used by the function. + // SkippedInstrs - Descriptors of instructions whose clobber list was + // ignored because all registers were spilled. It is still necessary to + // mark all the clobbered registers as used by the function. SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs; // isBulkSpilling - This flag is set when LiveRegMap will be cleared @@ -236,8 +239,7 @@ void RAFast::killVirtReg(unsigned VirtReg) { } /// spillVirtReg - This method spills the value specified by VirtReg into the -/// corresponding stack slot if needed. If isKill is set, the register is also -/// killed. +/// corresponding stack slot if needed. void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); @@ -265,6 +267,31 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI); ++NumStores; // Update statistics + // If this register is used by DBG_VALUE then insert new DBG_VALUE to + // identify spilled location as the place to find corresponding variable's + // value. + if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) { + const MDNode *MDPtr = + DBG->getOperand(DBG->getNumOperands()-1).getMetadata(); + int64_t Offset = 0; + if (DBG->getOperand(1).isImm()) + Offset = DBG->getOperand(1).getImm(); + DebugLoc DL; + if (MI == MBB->end()) { + // If MI is at basic block end then use last instruction's location. + MachineBasicBlock::iterator EI = MI; + DL = (--EI)->getDebugLoc(); + } + else + DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = + TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) { + MachineBasicBlock *MBB = DBG->getParent(); + MBB->insert(MI, NewDV); + DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); + LiveDbgValueMap[LRI->first] = NewDV; + } + } if (SpillKill) LR.LastUse = 0; // Don't kill register again } @@ -471,7 +498,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { // First try to find a completely free register. for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { unsigned PhysReg = *I; - if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) + if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg) && + Allocatable.test(PhysReg)) return assignVirtToPhysReg(LRE, PhysReg); } @@ -480,6 +508,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { unsigned BestReg = 0, BestCost = spillImpossible; for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { + if (!Allocatable.test(*I)) + continue; unsigned Cost = calcSpillCost(*I); // Cost is 0 when all aliases are already disabled. if (Cost == 0) @@ -520,12 +550,9 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg); - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; // It's a copy, use the destination register as a hint. if (UseMI.isCopyLike()) Hint = UseMI.getOperand(0).getReg(); - else if (TII->isMoveInstr(UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) - Hint = DstReg; } allocVirtReg(MI, *LRI, Hint); } else if (LR.LastUse) { @@ -712,7 +739,8 @@ void RAFast::AllocateBasicBlock() { // Add live-in registers as live. for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) - definePhysReg(MII, *I, regReserved); + if (Allocatable.test(*I)) + definePhysReg(MII, *I, regReserved); SmallVector<unsigned, 8> VirtDead; SmallVector<MachineInstr*, 32> Coalesced; @@ -756,31 +784,43 @@ void RAFast::AllocateBasicBlock() { // Debug values are not allowed to change codegen in any way. if (MI->isDebugValue()) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); - if (LRI != LiveVirtRegs.end()) - setPhysReg(MI, i, LRI->second.PhysReg); - else { - int SS = StackSlotForVirtReg[Reg]; - if (SS == -1) - MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + bool ScanDbgValue = true; + while (ScanDbgValue) { + ScanDbgValue = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + LiveDbgValueMap[Reg] = MI; + LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); + if (LRI != LiveVirtRegs.end()) + setPhysReg(MI, i, LRI->second.PhysReg); else { - // Modify DBG_VALUE now that the value is in a spill slot. - uint64_t Offset = MI->getOperand(1).getImm(); - const MDNode *MDPtr = - MI->getOperand(MI->getNumOperands()-1).getMetadata(); - DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = - TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - } else - MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + int SS = StackSlotForVirtReg[Reg]; + if (SS == -1) + // We can't allocate a physreg for a DebugValue, sorry! + MO.setReg(0); + else { + // Modify DBG_VALUE now that the value is in a spill slot. + int64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = + MI->getOperand(MI->getNumOperands()-1).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = + TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << + "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + // Scan NewDV operands from the beginning. + MI = NewDV; + ScanDbgValue = true; + break; + } else + // We can't allocate a physreg for a DebugValue; sorry! + MO.setReg(0); + } } } } @@ -789,14 +829,13 @@ void RAFast::AllocateBasicBlock() { } // If this is a copy, we may be able to coalesce. - unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub; + unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0; if (MI->isCopy()) { CopyDst = MI->getOperand(0).getReg(); CopySrc = MI->getOperand(1).getReg(); CopyDstSub = MI->getOperand(0).getSubReg(); CopySrcSub = MI->getOperand(1).getSubReg(); - } else if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub)) - CopySrc = CopyDst = 0; + } // Track registers used by instruction. UsedInInstr.reset(); @@ -843,13 +882,18 @@ void RAFast::AllocateBasicBlock() { // operands. If there are also physical defs, these registers must avoid // both physical defs and uses, making them more constrained than normal // operands. + // Similarly, if there are multiple defs and tied operands, we must make + // sure the same register is allocated to uses and defs. // We didn't detect inline asm tied operands above, so just make this extra // pass for all inline asm. if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || - (hasTiedOps && hasPhysDefs)) { + (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) { handleThroughOperands(MI, VirtDead); // Don't attempt coalescing when we have funny stuff going on. CopyDst = 0; + // Pretend we have early clobbers so the use operands get marked below. + // This is not necessary for the common case of a single tied use. + hasEarlyClobbers = true; } // Second scan. @@ -870,14 +914,17 @@ void RAFast::AllocateBasicBlock() { MRI->addPhysRegsUsed(UsedInInstr); - // Track registers defined by instruction - early clobbers at this point. + // Track registers defined by instruction - early clobbers and tied uses at + // this point. UsedInInstr.reset(); if (hasEarlyClobbers) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) continue; + if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + // Look for physreg defs and tied uses. + if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; UsedInInstr.set(Reg); for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) UsedInInstr.set(*AS); @@ -887,9 +934,9 @@ void RAFast::AllocateBasicBlock() { unsigned DefOpEnd = MI->getNumOperands(); if (TID.isCall()) { // Spill all virtregs before a call. This serves two purposes: 1. If an - // exception is thrown, the landing pad is going to expect to find registers - // in their spill slots, and 2. we don't have to wade through all the - // <imp-def> operands on the call instruction. + // exception is thrown, the landing pad is going to expect to find + // registers in their spill slots, and 2. we don't have to wade through + // all the <imp-def> operands on the call instruction. DefOpEnd = VirtOpEnd; DEBUG(dbgs() << " Spilling remaining registers before call.\n"); spillAll(MI); @@ -992,6 +1039,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { SkippedInstrs.clear(); StackSlotForVirtReg.clear(); + LiveDbgValueMap.clear(); return true; } diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 044672d6d7a5e..5c62354a8872c 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -87,10 +87,10 @@ namespace { "to skip."), cl::init(0), cl::Hidden); - + struct RALinScan : public MachineFunctionPass { static char ID; - RALinScan() : MachineFunctionPass(&ID) { + RALinScan() : MachineFunctionPass(ID) { // Initialize the queue to record recently-used registers. if (NumRecentlyUsedRegs > 0) RecentRegs.resize(NumRecentlyUsedRegs, 0); @@ -125,9 +125,10 @@ namespace { const TargetRegisterInfo* tri_; const TargetInstrInfo* tii_; BitVector allocatableRegs_; + BitVector reservedRegs_; LiveIntervals* li_; LiveStacks* ls_; - const MachineLoopInfo *loopInfo; + MachineLoopInfo *loopInfo; /// handled_ - Intervals are added to the handled_ set in the order of their /// start value. This is uses for backtracking. @@ -255,9 +256,9 @@ namespace { SmallVector<LiveInterval*, 8> &SpillIntervals); /// attemptTrivialCoalescing - If a simple interval is defined by a copy, - /// try allocate the definition the same register as the source register - /// if the register is not defined during live time of the interval. This - /// eliminate a copy. This is used to coalesce copies which were not + /// try to allocate the definition to the same register as the source, + /// if the register is not defined during the life time of the interval. + /// This eliminates a copy, and is used to coalesce copies which were not /// coalesced away before allocation either due to dest and src being in /// different register classes or because the coalescer was overly /// conservative. @@ -335,6 +336,17 @@ namespace { SmallVector<unsigned, 256> &inactiveCounts, bool SkipDGRegs); + /// getFirstNonReservedPhysReg - return the first non-reserved physical + /// register in the register class. + unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) { + TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_); + TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_); + while (i != aoe && reservedRegs_.test(*i)) + ++i; + assert(i != aoe && "All registers reserved?!"); + return *i; + } + void ComputeRelatedRegClasses(); template <typename ItTy> @@ -358,8 +370,8 @@ namespace { char RALinScan::ID = 0; } -static RegisterPass<RALinScan> -X("linearscan-regalloc", "Linear Scan Register Allocator"); +INITIALIZE_PASS(RALinScan, "linearscan-regalloc", + "Linear Scan Register Allocator", false, false); void RALinScan::ComputeRelatedRegClasses() { // First pass, add all reg classes to the union, and determine at least one @@ -371,7 +383,7 @@ void RALinScan::ComputeRelatedRegClasses() { for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); I != E; ++I) { HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0; - + const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; if (PRC) { // Already processed this register. Just make sure we know that @@ -382,7 +394,7 @@ void RALinScan::ComputeRelatedRegClasses() { } } } - + // Second pass, now that we know conservatively what register classes each reg // belongs to, add info about aliases. We don't need to do this for targets // without register aliases. @@ -419,20 +431,15 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { unsigned CandReg; { MachineInstr *CopyMI; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (vni->def != SlotIndex() && vni->isDefAccurate() && - (CopyMI = li_->getInstructionFromIndex(vni->def)) && - (CopyMI->isCopy() || - tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))) + (CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy()) // Defined by a copy, try to extend SrcReg forward - CandReg = CopyMI->isCopy() ? CopyMI->getOperand(1).getReg() : SrcReg; + CandReg = CopyMI->getOperand(1).getReg(); else if (TrivCoalesceEnds && - (CopyMI = - li_->getInstructionFromIndex(range.end.getBaseIndex())) && - tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - cur.reg == SrcReg) + (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) && + CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg()) // Only used by a copy, try to extend DstReg backwards - CandReg = DstReg; + CandReg = CopyMI->getOperand(0).getReg(); else return Reg; } @@ -469,6 +476,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { tri_ = tm_->getRegisterInfo(); tii_ = tm_->getInstrInfo(); allocatableRegs_ = tri_->getAllocatableSet(fn); + reservedRegs_ = tri_->getReservedRegs(fn); li_ = &getAnalysis<LiveIntervals>(); ls_ = &getAnalysis<LiveStacks>(); loopInfo = &getAnalysis<MachineLoopInfo>(); @@ -487,9 +495,9 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { vrm_ = &getAnalysis<VirtRegMap>(); if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); - - spiller_.reset(createSpiller(mf_, li_, loopInfo, vrm_)); - + + spiller_.reset(createSpiller(*this, *mf_, *vrm_)); + initIntervalSets(); linearScan(); @@ -543,7 +551,7 @@ void RALinScan::linearScan() { // linear scan algorithm DEBUG({ dbgs() << "********** LINEAR SCAN **********\n" - << "********** Function: " + << "********** Function: " << mf_->getFunction()->getName() << '\n'; printIntervals("fixed", fixed_.begin(), fixed_.end()); }); @@ -765,7 +773,8 @@ FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { return IP.end(); } -static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, SlotIndex Point){ +static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, + SlotIndex Point){ for (unsigned i = 0, e = V.size(); i != e; ++i) { RALinScan::IntervalPtr &IP = V[i]; LiveInterval::iterator I = std::upper_bound(IP.first->begin(), @@ -804,7 +813,7 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_, static float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_, MachineRegisterInfo *mri_, - const MachineLoopInfo *loopInfo) { + MachineLoopInfo *loopInfo) { float Conflicts = 0; for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), E = mri_->reg_end(); I != E; ++I) { @@ -837,7 +846,7 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur, dbgs() << tri_->getName(Candidates[i].first) << " "; dbgs() << "\n"; }); - + // Calculate the number of conflicts of each candidate. for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { unsigned Reg = i->first->reg; @@ -955,7 +964,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { if (cur->empty()) { unsigned physReg = vrm_->getRegAllocPref(cur->reg); if (!physReg) - physReg = *RC->allocation_order_begin(*mf_); + physReg = getFirstNonReservedPhysReg(RC); DEBUG(dbgs() << tri_->getName(physReg) << '\n'); // Note the register is not really in use. vrm_->assignVirt2Phys(cur->reg, physReg); @@ -978,27 +987,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { if ((vni->def != SlotIndex()) && !vni->isUnused() && vni->isDefAccurate()) { MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (CopyMI && - tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) { - unsigned Reg = 0; - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) - Reg = SrcReg; - else if (vrm_->isAssignedReg(SrcReg)) - Reg = vrm_->getPhys(SrcReg); - if (Reg) { - if (SrcSubReg) - Reg = tri_->getSubReg(Reg, SrcSubReg); - if (DstSubReg) - Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); - if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) - mri_->setRegAllocationHint(cur->reg, 0, Reg); - } - } else if (CopyMI && CopyMI->isCopy()) { - DstReg = CopyMI->getOperand(0).getReg(); - DstSubReg = CopyMI->getOperand(0).getSubReg(); - SrcReg = CopyMI->getOperand(1).getReg(); - SrcSubReg = CopyMI->getOperand(1).getSubReg(); + if (CopyMI && CopyMI->isCopy()) { + unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); + unsigned SrcReg = CopyMI->getOperand(1).getReg(); + unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg(); unsigned Reg = 0; if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) Reg = SrcReg; @@ -1024,7 +1016,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Can only allocate virtual registers!"); const TargetRegisterClass *RegRC = mri_->getRegClass(Reg); - // If this is not in a related reg class to the register we're allocating, + // If this is not in a related reg class to the register we're allocating, // don't check it. if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && cur->overlapsFrom(*i->first, i->second-1)) { @@ -1033,7 +1025,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight)); } } - + // Speculatively check to see if we can get a register right now. If not, // we know we won't be able to by adding more constraints. If so, we can // check to see if it is valid. Doing an exhaustive search of the fixed_ list @@ -1048,7 +1040,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { SmallSet<unsigned, 8> RegAliases; for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS) RegAliases.insert(*AS); - + bool ConflictsWithFixed = false; for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { IntervalPtr &IP = fixed_[i]; @@ -1068,7 +1060,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { } } } - + // Okay, the register picked by our speculative getFreePhysReg call turned // out to be in use. Actually add all of the conflicting fixed registers to // regUse_ so we can do an accurate query. @@ -1080,7 +1072,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { LiveInterval *I = IP.first; const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && + if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && I->endIndex() > StartPosition) { LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); IP.second = II; @@ -1099,11 +1091,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { physReg = getFreePhysReg(cur); } } - + // Restore the physical register tracker, removing information about the // future. restoreRegUses(); - + // If we find a free register, we are done: assign this virtual to // the free physical register and add this interval to the active // list. @@ -1118,7 +1110,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { UpgradeRegister(physReg); if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { // "Downgrade" physReg to try to keep physReg from being allocated until - // the next reload from the same SS is allocated. + // the next reload from the same SS is allocated. mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg); DowngradeRegister(cur, physReg); } @@ -1131,7 +1123,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { for (std::vector<std::pair<unsigned, float> >::iterator I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) updateSpillWeights(SpillWeights, I->first, I->second, RC); - + // for each interval in active, update spill weights. for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); i != e; ++i) { @@ -1141,7 +1133,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { reg = vrm_->getPhys(reg); updateSpillWeights(SpillWeights, reg, i->first->weight, RC); } - + DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n"); // Find a register to spill. @@ -1155,17 +1147,22 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { e = RC->allocation_order_end(*mf_); i != e; ++i) { unsigned reg = *i; float regWeight = SpillWeights[reg]; - // Skip recently allocated registers. + // Don't even consider reserved regs. + if (reservedRegs_.test(reg)) + continue; + // Skip recently allocated registers and reserved registers. if (minWeight > regWeight && !isRecentlyUsed(reg)) Found = true; RegsWeights.push_back(std::make_pair(reg, regWeight)); } - + // If we didn't find a register that is spillable, try aliases? if (!Found) { for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_), e = RC->allocation_order_end(*mf_); i != e; ++i) { unsigned reg = *i; + if (reservedRegs_.test(reg)) + continue; // No need to worry about if the alias register size < regsize of RC. // We are going to spill all registers that alias it anyway. for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) @@ -1179,7 +1176,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { minWeight = RegsWeights[0].second; if (minWeight == HUGE_VALF) { // All registers must have inf weight. Just grab one! - minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_); + minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC); if (cur->weight == HUGE_VALF || li_->getApproximateInstructionCount(*cur) == 0) { // Spill a physical register around defs and uses. @@ -1224,8 +1221,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // linearscan. if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); - SmallVector<LiveInterval*, 8> spillIs; - std::vector<LiveInterval*> added; + SmallVector<LiveInterval*, 8> spillIs, added; spiller_->spill(cur, added, spillIs); std::sort(added.begin(), added.end(), LISorter()); @@ -1288,27 +1284,33 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // The earliest start of a Spilled interval indicates up to where // in handled we need to roll back - assert(!spillIs.empty() && "No spill intervals?"); + assert(!spillIs.empty() && "No spill intervals?"); SlotIndex earliestStart = spillIs[0]->beginIndex(); - + // Spill live intervals of virtual regs mapped to the physical register we // want to clear (and its aliases). We only spill those that overlap with the // current interval as the rest do not affect its allocation. we also keep // track of the earliest start of all spilled live intervals since this will // mark our rollback point. - std::vector<LiveInterval*> added; + SmallVector<LiveInterval*, 8> added; while (!spillIs.empty()) { LiveInterval *sli = spillIs.back(); spillIs.pop_back(); DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); if (sli->beginIndex() < earliestStart) earliestStart = sli->beginIndex(); - - spiller_->spill(sli, added, spillIs, &earliestStart); + spiller_->spill(sli, added, spillIs); addStackInterval(sli, ls_, li_, mri_, *vrm_); spilled.insert(sli->reg); } + // Include any added intervals in earliestStart. + for (unsigned i = 0, e = added.size(); i != e; ++i) { + SlotIndex SI = added[i]->beginIndex(); + if (SI < earliestStart) + earliestStart = SI; + } + DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n'); // Scan handled in reverse order up to the earliest start of a @@ -1431,6 +1433,9 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, // Ignore "downgraded" registers. if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; + // Skip reserved registers. + if (reservedRegs_.test(Reg)) + continue; // Skip recently allocated registers. if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) { FreeReg = Reg; @@ -1459,6 +1464,9 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, // Ignore "downgraded" registers. if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; + // Skip reserved registers. + if (reservedRegs_.test(Reg)) + continue; if (isRegAvail(Reg) && Reg < inactiveCounts.size() && FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) { FreeReg = Reg; @@ -1479,17 +1487,17 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { SmallVector<unsigned, 256> inactiveCounts; unsigned MaxInactiveCount = 0; - + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - + for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); i != e; ++i) { unsigned reg = i->first->reg; assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); - // If this is not in a related reg class to the register we're allocating, + // If this is not in a related reg class to the register we're allocating, // don't check it. const TargetRegisterClass *RegRC = mri_->getRegClass(reg); if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { @@ -1506,7 +1514,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { unsigned Preference = vrm_->getRegAllocPref(cur->reg); if (Preference) { DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") "); - if (isRegAvail(Preference) && + if (isRegAvail(Preference) && RC->contains(Preference)) return Preference; } diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 7e61a12a7eea5..61f337bab49c4 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -34,6 +34,8 @@ #include "PBQP/HeuristicSolver.h" #include "PBQP/Graph.h" #include "PBQP/Heuristics/Briggs.h" +#include "RenderMachineFunction.h" +#include "Splitter.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" #include "llvm/CodeGen/CalcSpillWeights.h" @@ -65,6 +67,11 @@ pbqpCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden); +static cl::opt<bool> +pbqpPreSplitting("pbqp-pre-splitting", + cl::desc("Pre-splite before PBQP register allocation."), + cl::init(false), cl::Hidden); + namespace { /// @@ -77,7 +84,7 @@ namespace { static char ID; /// Construct a PBQP register allocator. - PBQPRegAlloc() : MachineFunctionPass(&ID) {} + PBQPRegAlloc() : MachineFunctionPass(ID) {} /// Return the pass name. virtual const char* getPassName() const { @@ -96,7 +103,10 @@ namespace { au.addPreserved<LiveStacks>(); au.addRequired<MachineLoopInfo>(); au.addPreserved<MachineLoopInfo>(); + if (pbqpPreSplitting) + au.addRequired<LoopSplitter>(); au.addRequired<VirtRegMap>(); + au.addRequired<RenderMachineFunction>(); MachineFunctionPass::getAnalysisUsage(au); } @@ -104,7 +114,15 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); private: - typedef std::map<const LiveInterval*, unsigned> LI2NodeMap; + + class LIOrdering { + public: + bool operator()(const LiveInterval *li1, const LiveInterval *li2) const { + return li1->reg < li2->reg; + } + }; + + typedef std::map<const LiveInterval*, unsigned, LIOrdering> LI2NodeMap; typedef std::vector<const LiveInterval*> Node2LIMap; typedef std::vector<unsigned> AllowedSet; typedef std::vector<AllowedSet> AllowedSetMap; @@ -112,7 +130,7 @@ namespace { typedef std::pair<unsigned, unsigned> RegPair; typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; - typedef std::set<LiveInterval*> LiveIntervalSet; + typedef std::set<LiveInterval*, LIOrdering> LiveIntervalSet; typedef std::vector<PBQP::Graph::NodeItr> NodeVector; @@ -122,6 +140,7 @@ namespace { const TargetInstrInfo *tii; const MachineLoopInfo *loopInfo; MachineRegisterInfo *mri; + RenderMachineFunction *rmf; LiveIntervals *lis; LiveStacks *lss; @@ -379,12 +398,14 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { iItr != iEnd; ++iItr) { const MachineInstr *instr = &*iItr; - unsigned srcReg, dstReg, srcSubReg, dstSubReg; // If this isn't a copy then continue to the next instruction. - if (!tii->isMoveInstr(*instr, srcReg, dstReg, srcSubReg, dstSubReg)) + if (!instr->isCopy()) continue; + unsigned srcReg = instr->getOperand(1).getReg(); + unsigned dstReg = instr->getOperand(0).getReg(); + // If the registers are already the same our job is nice and easy. if (dstReg == srcReg) continue; @@ -567,6 +588,8 @@ PBQP::Graph PBQPRegAlloc::constructPBQPProblem() { // Resize allowedSets container appropriately. allowedSets.resize(vregIntervalsToAlloc.size()); + BitVector ReservedRegs = tri->getReservedRegs(*mf); + // Iterate over virtual register intervals to compute allowed sets... for (unsigned node = 0; node < node2LI.size(); ++node) { @@ -575,8 +598,12 @@ PBQP::Graph PBQPRegAlloc::constructPBQPProblem() { const TargetRegisterClass *liRC = mri->getRegClass(li->reg); // Start by assuming all allocable registers in the class are allowed... - RegVector liAllowed(liRC->allocation_order_begin(*mf), - liRC->allocation_order_end(*mf)); + RegVector liAllowed; + TargetRegisterClass::iterator aob = liRC->allocation_order_begin(*mf); + TargetRegisterClass::iterator aoe = liRC->allocation_order_end(*mf); + for (TargetRegisterClass::iterator it = aob; it != aoe; ++it) + if (!ReservedRegs.test(*it)) + liAllowed.push_back(*it); // Eliminate the physical registers which overlap with this range, along // with all their aliases. @@ -735,9 +762,11 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { const LiveInterval *spillInterval = node2LI[node]; double oldSpillWeight = spillInterval->weight; SmallVector<LiveInterval*, 8> spillIs; + rmf->rememberUseDefs(spillInterval); std::vector<LiveInterval*> newSpills = lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm); addStackInterval(spillInterval, mri); + rmf->rememberSpills(spillInterval, newSpills); (void) oldSpillWeight; DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: " @@ -845,9 +874,11 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { lis = &getAnalysis<LiveIntervals>(); lss = &getAnalysis<LiveStacks>(); loopInfo = &getAnalysis<MachineLoopInfo>(); + rmf = &getAnalysis<RenderMachineFunction>(); vrm = &getAnalysis<VirtRegMap>(); + DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); // Allocator main loop: @@ -884,6 +915,8 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { // Finalise allocation, allocate empty ranges. finalizeAlloc(); + rmf->renderMachineFunction("After PBQP register allocation.", vrm); + vregIntervalsToAlloc.clear(); emptyVRegIntervals.clear(); li2Node.clear(); diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index ab0bc2d78a608..02b5539f0f4f0 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -54,9 +54,8 @@ bool CoalescerPair::isMoveInstr(const MachineInstr *MI, DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm()); Src = MI->getOperand(2).getReg(); SrcSub = MI->getOperand(2).getSubReg(); - } else if (!tii_.isMoveInstr(*MI, Src, Dst, SrcSub, DstSub)) { + } else return false; - } return true; } diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 43b3fb6426351..a2580b85bcc33 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -21,7 +21,9 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -226,19 +228,14 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { used = ~RegsAvailable & ~ReservedRegs; } -/// CreateRegClassMask - Set the bits that represent the registers in the -/// TargetRegisterClass. -static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) { - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; - ++I) - Mask.set(*I); -} - unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (!isAliasUsed(*I)) + if (!isAliasUsed(*I)) { + DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) << + "\n"); return *I; + } return 0; } @@ -325,11 +322,9 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { - // Mask off the registers which are not in the TargetRegisterClass. - BitVector Candidates(NumPhysRegs, false); - CreateRegClassMask(RC, Candidates); - // Do not include reserved registers. - Candidates ^= ReservedRegs & Candidates; + // Consider all allocatable registers in the register class initially + BitVector Candidates = + TRI->getAllocatableSet(*I->getParent()->getParent(), RC); // Exclude all the registers being used by the instruction. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { @@ -349,8 +344,10 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); // If we found an unused register there is no reason to spill it. - if (!isAliasUsed(SReg)) + if (!isAliasUsed(SReg)) { + DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); return SReg; + } assert(ScavengedReg == 0 && "Scavenger slot is live, unable to scavenge another register!"); @@ -366,12 +363,12 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, "Cannot scavenge register without an emergency spill slot!"); TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI); MachineBasicBlock::iterator II = prior(I); - TRI->eliminateFrameIndex(II, SPAdj, NULL, this); + TRI->eliminateFrameIndex(II, SPAdj, this); // Restore the scavenged register before its use (or first terminator). TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI); II = prior(UseMI); - TRI->eliminateFrameIndex(II, SPAdj, NULL, this); + TRI->eliminateFrameIndex(II, SPAdj, this); } ScavengeRestore = prior(UseMI); @@ -380,5 +377,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // ScavengedReg = SReg; ScavengedRC = RC; + DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << + "\n"); + return SReg; } diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp new file mode 100644 index 0000000000000..93426eecbbc1e --- /dev/null +++ b/lib/CodeGen/RenderMachineFunction.cpp @@ -0,0 +1,1014 @@ +//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "rendermf" + +#include "RenderMachineFunction.h" + +#include "VirtRegMap.h" + +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + +#include <sstream> + +using namespace llvm; + +char RenderMachineFunction::ID = 0; +INITIALIZE_PASS(RenderMachineFunction, "rendermf", + "Render machine functions (and related info) to HTML pages", + false, false); + +static cl::opt<std::string> +outputFileSuffix("rmf-file-suffix", + cl::desc("Appended to function name to get output file name " + "(default: \".html\")"), + cl::init(".html"), cl::Hidden); + +static cl::opt<std::string> +machineFuncsToRender("rmf-funcs", + cl::desc("Coma seperated list of functions to render" + ", or \"*\"."), + cl::init(""), cl::Hidden); + +static cl::opt<std::string> +pressureClasses("rmf-classes", + cl::desc("Register classes to render pressure for."), + cl::init(""), cl::Hidden); + +static cl::opt<std::string> +showIntervals("rmf-intervals", + cl::desc("Live intervals to show alongside code."), + cl::init(""), cl::Hidden); + +static cl::opt<bool> +filterEmpty("rmf-filter-empty-intervals", + cl::desc("Don't display empty intervals."), + cl::init(true), cl::Hidden); + +static cl::opt<bool> +showEmptyIndexes("rmf-empty-indexes", + cl::desc("Render indexes not associated with instructions or " + "MBB starts."), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +useFancyVerticals("rmf-fancy-verts", + cl::desc("Use SVG for vertical text."), + cl::init(true), cl::Hidden); + +static cl::opt<bool> +prettyHTML("rmf-pretty-html", + cl::desc("Pretty print HTML. For debugging the renderer only.."), + cl::init(false), cl::Hidden); + + +namespace llvm { + + bool MFRenderingOptions::renderingOptionsProcessed; + std::set<std::string> MFRenderingOptions::mfNamesToRender; + bool MFRenderingOptions::renderAllMFs = false; + + std::set<std::string> MFRenderingOptions::classNamesToRender; + bool MFRenderingOptions::renderAllClasses = false; + + std::set<std::pair<unsigned, unsigned> > + MFRenderingOptions::intervalNumsToRender; + unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly; + + template <typename OutputItr> + void MFRenderingOptions::splitComaSeperatedList(const std::string &s, + OutputItr outItr) { + std::string::const_iterator curPos = s.begin(); + std::string::const_iterator nextComa = std::find(curPos, s.end(), ','); + while (nextComa != s.end()) { + std::string elem; + std::copy(curPos, nextComa, std::back_inserter(elem)); + *outItr = elem; + ++outItr; + curPos = llvm::next(nextComa); + nextComa = std::find(curPos, s.end(), ','); + } + + if (curPos != s.end()) { + std::string elem; + std::copy(curPos, s.end(), std::back_inserter(elem)); + *outItr = elem; + ++outItr; + } + } + + void MFRenderingOptions::processOptions() { + if (!renderingOptionsProcessed) { + processFuncNames(); + processRegClassNames(); + processIntervalNumbers(); + renderingOptionsProcessed = true; + } + } + + void MFRenderingOptions::processFuncNames() { + if (machineFuncsToRender == "*") { + renderAllMFs = true; + } else { + splitComaSeperatedList(machineFuncsToRender, + std::inserter(mfNamesToRender, + mfNamesToRender.begin())); + } + } + + void MFRenderingOptions::processRegClassNames() { + if (pressureClasses == "*") { + renderAllClasses = true; + } else { + splitComaSeperatedList(pressureClasses, + std::inserter(classNamesToRender, + classNamesToRender.begin())); + } + } + + void MFRenderingOptions::processIntervalNumbers() { + std::set<std::string> intervalRanges; + splitComaSeperatedList(showIntervals, + std::inserter(intervalRanges, + intervalRanges.begin())); + std::for_each(intervalRanges.begin(), intervalRanges.end(), + processIntervalRange); + } + + void MFRenderingOptions::processIntervalRange( + const std::string &intervalRangeStr) { + if (intervalRangeStr == "*") { + intervalTypesToRender |= All; + } else if (intervalRangeStr == "virt-nospills*") { + intervalTypesToRender |= VirtNoSpills; + } else if (intervalRangeStr == "spills*") { + intervalTypesToRender |= VirtSpills; + } else if (intervalRangeStr == "virt*") { + intervalTypesToRender |= AllVirt; + } else if (intervalRangeStr == "phys*") { + intervalTypesToRender |= AllPhys; + } else { + std::istringstream iss(intervalRangeStr); + unsigned reg1, reg2; + if ((iss >> reg1 >> std::ws)) { + if (iss.eof()) { + intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1)); + } else { + char c; + iss >> c; + if (c == '-' && (iss >> reg2)) { + intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1)); + } else { + dbgs() << "Warning: Invalid interval range \"" + << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; + } + } + } else { + dbgs() << "Warning: Invalid interval number \"" + << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; + } + } + } + + void MFRenderingOptions::setup(MachineFunction *mf, + const TargetRegisterInfo *tri, + LiveIntervals *lis, + const RenderMachineFunction *rmf) { + this->mf = mf; + this->tri = tri; + this->lis = lis; + this->rmf = rmf; + + clear(); + } + + void MFRenderingOptions::clear() { + regClassesTranslatedToCurrentFunction = false; + regClassSet.clear(); + + intervalsTranslatedToCurrentFunction = false; + intervalSet.clear(); + } + + void MFRenderingOptions::resetRenderSpecificOptions() { + intervalSet.clear(); + intervalsTranslatedToCurrentFunction = false; + } + + bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const { + processOptions(); + + return (renderAllMFs || + mfNamesToRender.find(mf->getFunction()->getName()) != + mfNamesToRender.end()); + } + + const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{ + translateRegClassNamesToCurrentFunction(); + return regClassSet; + } + + const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const { + translateIntervalNumbersToCurrentFunction(); + return intervalSet; + } + + bool MFRenderingOptions::renderEmptyIndexes() const { + return showEmptyIndexes; + } + + bool MFRenderingOptions::fancyVerticals() const { + return useFancyVerticals; + } + + void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const { + if (!regClassesTranslatedToCurrentFunction) { + processOptions(); + for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + if (renderAllClasses || + classNamesToRender.find(trc->getName()) != + classNamesToRender.end()) { + regClassSet.insert(trc); + } + } + regClassesTranslatedToCurrentFunction = true; + } + } + + void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const { + if (!intervalsTranslatedToCurrentFunction) { + processOptions(); + + // If we're not just doing explicit then do a copy over all matching + // types. + if (intervalTypesToRender != ExplicitOnly) { + for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval *li = liItr->second; + + if (filterEmpty && li->empty()) + continue; + + if ((TargetRegisterInfo::isPhysicalRegister(li->reg) && + (intervalTypesToRender & AllPhys))) { + intervalSet.insert(li); + } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) { + if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) || + ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) { + intervalSet.insert(li); + } + } + } + } + + // If we need to process the explicit list... + if (intervalTypesToRender != All) { + for (std::set<std::pair<unsigned, unsigned> >::const_iterator + regRangeItr = intervalNumsToRender.begin(), + regRangeEnd = intervalNumsToRender.end(); + regRangeItr != regRangeEnd; ++regRangeItr) { + const std::pair<unsigned, unsigned> &range = *regRangeItr; + for (unsigned reg = range.first; reg != range.second; ++reg) { + if (lis->hasInterval(reg)) { + intervalSet.insert(&lis->getInterval(reg)); + } + } + } + } + + intervalsTranslatedToCurrentFunction = true; + } + } + + // ---------- TargetRegisterExtraInformation implementation ---------- + + TargetRegisterExtraInfo::TargetRegisterExtraInfo() + : mapsPopulated(false) { + } + + void TargetRegisterExtraInfo::setup(MachineFunction *mf, + MachineRegisterInfo *mri, + const TargetRegisterInfo *tri, + LiveIntervals *lis) { + this->mf = mf; + this->mri = mri; + this->tri = tri; + this->lis = lis; + } + + void TargetRegisterExtraInfo::reset() { + if (!mapsPopulated) { + initWorst(); + //initBounds(); + initCapacity(); + mapsPopulated = true; + } + + resetPressureAndLiveStates(); + } + + void TargetRegisterExtraInfo::clear() { + prWorst.clear(); + vrWorst.clear(); + capacityMap.clear(); + pressureMap.clear(); + //liveStatesMap.clear(); + mapsPopulated = false; + } + + void TargetRegisterExtraInfo::initWorst() { + assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() && + "Worst map already initialised?"); + + // Start with the physical registers. + for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) { + WorstMapLine &pregLine = prWorst[preg]; + + for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + + unsigned numOverlaps = 0; + for (TargetRegisterClass::iterator rItr = trc->begin(), + rEnd = trc->end(); + rItr != rEnd; ++rItr) { + unsigned trcPReg = *rItr; + if (tri->regsOverlap(preg, trcPReg)) + ++numOverlaps; + } + + pregLine[trc] = numOverlaps; + } + } + + // Now the register classes. + for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rc1Itr != rcEnd; ++rc1Itr) { + const TargetRegisterClass *trc1 = *rc1Itr; + WorstMapLine &classLine = vrWorst[trc1]; + + for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin(); + rc2Itr != rcEnd; ++rc2Itr) { + const TargetRegisterClass *trc2 = *rc2Itr; + + unsigned worst = 0; + + for (TargetRegisterClass::iterator trc1Itr = trc1->begin(), + trc1End = trc1->end(); + trc1Itr != trc1End; ++trc1Itr) { + unsigned trc1Reg = *trc1Itr; + unsigned trc1RegWorst = 0; + + for (TargetRegisterClass::iterator trc2Itr = trc2->begin(), + trc2End = trc2->end(); + trc2Itr != trc2End; ++trc2Itr) { + unsigned trc2Reg = *trc2Itr; + if (tri->regsOverlap(trc1Reg, trc2Reg)) + ++trc1RegWorst; + } + if (trc1RegWorst > worst) { + worst = trc1RegWorst; + } + } + + if (worst != 0) { + classLine[trc2] = worst; + } + } + } + } + + unsigned TargetRegisterExtraInfo::getWorst( + unsigned reg, + const TargetRegisterClass *trc) const { + const WorstMapLine *wml = 0; + if (TargetRegisterInfo::isPhysicalRegister(reg)) { + PRWorstMap::const_iterator prwItr = prWorst.find(reg); + assert(prwItr != prWorst.end() && "Missing prWorst entry."); + wml = &prwItr->second; + } else { + const TargetRegisterClass *regTRC = mri->getRegClass(reg); + VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC); + assert(vrwItr != vrWorst.end() && "Missing vrWorst entry."); + wml = &vrwItr->second; + } + + WorstMapLine::const_iterator wmlItr = wml->find(trc); + if (wmlItr == wml->end()) + return 0; + + return wmlItr->second; + } + + void TargetRegisterExtraInfo::initCapacity() { + assert(!mapsPopulated && capacityMap.empty() && + "Capacity map already initialised?"); + + for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + unsigned capacity = std::distance(trc->allocation_order_begin(*mf), + trc->allocation_order_end(*mf)); + + if (capacity != 0) + capacityMap[trc] = capacity; + } + } + + unsigned TargetRegisterExtraInfo::getCapacity( + const TargetRegisterClass *trc) const { + CapacityMap::const_iterator cmItr = capacityMap.find(trc); + assert(cmItr != capacityMap.end() && + "vreg with unallocable register class"); + return cmItr->second; + } + + void TargetRegisterExtraInfo::resetPressureAndLiveStates() { + pressureMap.clear(); + //liveStatesMap.clear(); + + // Iterate over all slots. + + + // Iterate over all live intervals. + for (LiveIntervals::iterator liItr = lis->begin(), + liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval *li = liItr->second; + + const TargetRegisterClass *liTRC; + + if (TargetRegisterInfo::isPhysicalRegister(li->reg)) + continue; + + liTRC = mri->getRegClass(li->reg); + + + // For all ranges in the current interal. + for (LiveInterval::iterator lrItr = li->begin(), + lrEnd = li->end(); + lrItr != lrEnd; ++lrItr) { + LiveRange *lr = &*lrItr; + + // For all slots in the current range. + for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) { + + // Record increased pressure at index for all overlapping classes. + for (TargetRegisterInfo::regclass_iterator + rcItr = tri->regclass_begin(), + rcEnd = tri->regclass_end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + + if (trc->allocation_order_begin(*mf) == + trc->allocation_order_end(*mf)) + continue; + + unsigned worstAtI = getWorst(li->reg, trc); + + if (worstAtI != 0) { + pressureMap[i][trc] += worstAtI; + } + } + } + } + } + } + + unsigned TargetRegisterExtraInfo::getPressureAtSlot( + const TargetRegisterClass *trc, + SlotIndex i) const { + PressureMap::const_iterator pmItr = pressureMap.find(i); + if (pmItr == pressureMap.end()) + return 0; + const PressureMapLine &pmLine = pmItr->second; + PressureMapLine::const_iterator pmlItr = pmLine.find(trc); + if (pmlItr == pmLine.end()) + return 0; + return pmlItr->second; + } + + bool TargetRegisterExtraInfo::classOverCapacityAtSlot( + const TargetRegisterClass *trc, + SlotIndex i) const { + return (getPressureAtSlot(trc, i) > getCapacity(trc)); + } + + // ---------- MachineFunctionRenderer implementation ---------- + + void RenderMachineFunction::Spacer::print(raw_ostream &os) const { + if (!prettyHTML) + return; + for (unsigned i = 0; i < ns; ++i) { + os << " "; + } + } + + RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const { + return Spacer(ns); + } + + raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) { + s.print(os); + return os; + } + + template <typename Iterator> + std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const { + std::string r; + + for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) { + char c = *sItr; + + switch (c) { + case '<': r.append("<"); break; + case '>': r.append(">"); break; + case '&': r.append("&"); break; + case ' ': r.append(" "); break; + case '\"': r.append("""); break; + default: r.push_back(c); break; + } + } + + return r; + } + + RenderMachineFunction::LiveState + RenderMachineFunction::getLiveStateAt(const LiveInterval *li, + SlotIndex i) const { + const MachineInstr *mi = sis->getInstructionFromIndex(i); + + // For uses/defs recorded use/def indexes override current liveness and + // instruction operands (Only for the interval which records the indexes). + if (i.isUse() || i.isDef()) { + UseDefs::const_iterator udItr = useDefs.find(li); + if (udItr != useDefs.end()) { + const SlotSet &slotSet = udItr->second; + if (slotSet.count(i)) { + if (i.isUse()) { + return Used; + } + // else + return Defined; + } + } + } + + // If the slot is a load/store, or there's no info in the use/def set then + // use liveness and instruction operand info. + if (li->liveAt(i)) { + + if (mi == 0) { + if (vrm == 0 || + (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { + return AliveReg; + } else { + return AliveStack; + } + } else { + if (i.isDef() && mi->definesRegister(li->reg, tri)) { + return Defined; + } else if (i.isUse() && mi->readsRegister(li->reg)) { + return Used; + } else { + if (vrm == 0 || + (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { + return AliveReg; + } else { + return AliveStack; + } + } + } + } + return Dead; + } + + RenderMachineFunction::PressureState + RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc, + SlotIndex i) const { + if (trei.getPressureAtSlot(trc, i) == 0) { + return Zero; + } else if (trei.classOverCapacityAtSlot(trc, i)){ + return High; + } + return Low; + } + + /// \brief Render a machine instruction. + void RenderMachineFunction::renderMachineInstr(raw_ostream &os, + const MachineInstr *mi) const { + std::string s; + raw_string_ostream oss(s); + oss << *mi; + + os << escapeChars(oss.str()); + } + + template <typename T> + void RenderMachineFunction::renderVertical(const Spacer &indent, + raw_ostream &os, + const T &t) const { + if (ro.fancyVerticals()) { + os << indent << "<object\n" + << indent + s(2) << "class=\"obj\"\n" + << indent + s(2) << "type=\"image/svg+xml\"\n" + << indent + s(2) << "width=\"14px\"\n" + << indent + s(2) << "height=\"55px\"\n" + << indent + s(2) << "data=\"data:image/svg+xml,\n" + << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n" + << indent + s(6) << "<text x='-55' y='10' " + "font-family='Courier' font-size='12' " + "transform='rotate(-90)' " + "text-rendering='optimizeSpeed' " + "fill='#000'>" << t << "</text>\n" + << indent + s(4) << "</svg>\">\n" + << indent << "</object>\n"; + } else { + std::ostringstream oss; + oss << t; + std::string tStr(oss.str()); + + os << indent; + for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end(); + tStrItr != tStrEnd; ++tStrItr) { + os << *tStrItr << "<br/>"; + } + os << "\n"; + } + } + + void RenderMachineFunction::insertCSS(const Spacer &indent, + raw_ostream &os) const { + os << indent << "<style type=\"text/css\">\n" + << indent + s(2) << "body { font-color: black; }\n" + << indent + s(2) << "table.code td { font-family: monospace; " + "border-width: 0px; border-style: solid; " + "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n" + << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n" + << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n" + << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n" + << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n" + << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n" + << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n" + << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n" + << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n" + << indent + s(2) << "table.code th { border-width: 0px; " + "border-style: solid; }\n" + << indent << "</style>\n"; + } + + void RenderMachineFunction::renderFunctionSummary( + const Spacer &indent, raw_ostream &os, + const char * const renderContextStr) const { + os << indent << "<h1>Function: " << mf->getFunction()->getName() + << "</h1>\n" + << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n"; + } + + + void RenderMachineFunction::renderPressureTableLegend( + const Spacer &indent, + raw_ostream &os) const { + os << indent << "<h2>Rendering Pressure Legend:</h2>\n" + << indent << "<table class=\"code\">\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<th>Pressure</th><th>Description</th>" + "<th>Appearance</th>\n" + << indent + s(2) << "</tr>\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<td>No Pressure</td>" + "<td>No physical registers of this class requested.</td>" + "<td class=\"p-z\"> </td>\n" + << indent + s(2) << "</tr>\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<td>Low Pressure</td>" + "<td>Sufficient physical registers to meet demand.</td>" + "<td class=\"p-l\"> </td>\n" + << indent + s(2) << "</tr>\n" + << indent + s(2) << "<tr>\n" + << indent + s(4) << "<td>High Pressure</td>" + "<td>Potentially insufficient physical registers to meet demand.</td>" + "<td class=\"p-h\"> </td>\n" + << indent + s(2) << "</tr>\n" + << indent << "</table>\n"; + } + + template <typename CellType> + void RenderMachineFunction::renderCellsWithRLE( + const Spacer &indent, raw_ostream &os, + const std::pair<CellType, unsigned> &rleAccumulator, + const std::map<CellType, std::string> &cellTypeStrs) const { + + if (rleAccumulator.second == 0) + return; + + typename std::map<CellType, std::string>::const_iterator ctsItr = + cellTypeStrs.find(rleAccumulator.first); + + assert(ctsItr != cellTypeStrs.end() && "No string for given cell type."); + + os << indent + s(4) << "<td class=\"" << ctsItr->second << "\""; + if (rleAccumulator.second > 1) + os << " colspan=" << rleAccumulator.second; + os << "></td>\n"; + } + + + void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent, + raw_ostream &os) const { + + std::map<LiveState, std::string> lsStrs; + lsStrs[Dead] = "l-n"; + lsStrs[Defined] = "l-d"; + lsStrs[Used] = "l-u"; + lsStrs[AliveReg] = "l-r"; + lsStrs[AliveStack] = "l-s"; + + std::map<PressureState, std::string> psStrs; + psStrs[Zero] = "p-z"; + psStrs[Low] = "p-l"; + psStrs[High] = "p-h"; + + // Open the table... + + os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n" + << indent + s(2) << "<tr>\n"; + + // Render the header row... + + os << indent + s(4) << "<th>index</th>\n" + << indent + s(4) << "<th>instr</th>\n"; + + // Render class names if necessary... + if (!ro.regClasses().empty()) { + for (MFRenderingOptions::RegClassSet::const_iterator + rcItr = ro.regClasses().begin(), + rcEnd = ro.regClasses().end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + os << indent + s(4) << "<th>\n"; + renderVertical(indent + s(6), os, trc->getName()); + os << indent + s(4) << "</th>\n"; + } + } + + // FIXME: Is there a nicer way to insert space between columns in HTML? + if (!ro.regClasses().empty() && !ro.intervals().empty()) + os << indent + s(4) << "<th> </th>\n"; + + // Render interval numbers if necessary... + if (!ro.intervals().empty()) { + for (MFRenderingOptions::IntervalSet::const_iterator + liItr = ro.intervals().begin(), + liEnd = ro.intervals().end(); + liItr != liEnd; ++liItr) { + + const LiveInterval *li = *liItr; + os << indent + s(4) << "<th>\n"; + renderVertical(indent + s(6), os, li->reg); + os << indent + s(4) << "</th>\n"; + } + } + + os << indent + s(2) << "</tr>\n"; + + // End header row, start with the data rows... + + MachineInstr *mi = 0; + + // Data rows: + for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex(); + i = i.getNextSlot()) { + + // Render the slot column. + os << indent + s(2) << "<tr height=6ex>\n"; + + // Render the code column. + if (i.isLoad()) { + MachineBasicBlock *mbb = sis->getMBBFromIndex(i); + mi = sis->getInstructionFromIndex(i); + + if (i == sis->getMBBStartIdx(mbb) || mi != 0 || + ro.renderEmptyIndexes()) { + os << indent + s(4) << "<td rowspan=4>" << i << " </td>\n" + << indent + s(4) << "<td rowspan=4>\n"; + + if (i == sis->getMBBStartIdx(mbb)) { + os << indent + s(6) << "BB#" << mbb->getNumber() << ": \n"; + } else if (mi != 0) { + os << indent + s(6) << " "; + renderMachineInstr(os, mi); + } else { + // Empty interval - leave blank. + } + os << indent + s(4) << "</td>\n"; + } else { + i = i.getStoreIndex(); // <- Will be incremented to the next index. + continue; + } + } + + // Render the class columns. + if (!ro.regClasses().empty()) { + std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0); + for (MFRenderingOptions::RegClassSet::const_iterator + rcItr = ro.regClasses().begin(), + rcEnd = ro.regClasses().end(); + rcItr != rcEnd; ++rcItr) { + const TargetRegisterClass *trc = *rcItr; + PressureState newPressure = getPressureStateAt(trc, i); + + if (newPressure == psRLEAccumulator.first) { + ++psRLEAccumulator.second; + } else { + renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); + psRLEAccumulator.first = newPressure; + psRLEAccumulator.second = 1; + } + } + renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); + } + + // FIXME: Is there a nicer way to insert space between columns in HTML? + if (!ro.regClasses().empty() && !ro.intervals().empty()) + os << indent + s(4) << "<td width=2em></td>\n"; + + if (!ro.intervals().empty()) { + std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0); + for (MFRenderingOptions::IntervalSet::const_iterator + liItr = ro.intervals().begin(), + liEnd = ro.intervals().end(); + liItr != liEnd; ++liItr) { + const LiveInterval *li = *liItr; + LiveState newLiveness = getLiveStateAt(li, i); + + if (newLiveness == lsRLEAccumulator.first) { + ++lsRLEAccumulator.second; + } else { + renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); + lsRLEAccumulator.first = newLiveness; + lsRLEAccumulator.second = 1; + } + } + renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); + } + os << indent + s(2) << "</tr>\n"; + } + + os << indent << "</table>\n"; + + if (!ro.regClasses().empty()) + renderPressureTableLegend(indent, os); + } + + void RenderMachineFunction::renderFunctionPage( + raw_ostream &os, + const char * const renderContextStr) const { + os << "<html>\n" + << s(2) << "<head>\n" + << s(4) << "<title>" << fqn << "</title>\n"; + + insertCSS(s(4), os); + + os << s(2) << "<head>\n" + << s(2) << "<body >\n"; + + renderFunctionSummary(s(4), os, renderContextStr); + + os << s(4) << "<br/><br/><br/>\n"; + + //renderLiveIntervalInfoTable(" ", os); + + os << s(4) << "<br/><br/><br/>\n"; + + renderCodeTablePlusPI(s(4), os); + + os << s(2) << "</body>\n" + << "</html>\n"; + } + + void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<SlotIndexes>(); + au.addRequired<LiveIntervals>(); + au.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(au); + } + + bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) { + + mf = &fn; + mri = &mf->getRegInfo(); + tri = mf->getTarget().getRegisterInfo(); + lis = &getAnalysis<LiveIntervals>(); + sis = &getAnalysis<SlotIndexes>(); + + trei.setup(mf, mri, tri, lis); + ro.setup(mf, tri, lis, this); + spillIntervals.clear(); + spillFor.clear(); + useDefs.clear(); + + fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + + mf->getFunction()->getName().str(); + + return false; + } + + void RenderMachineFunction::releaseMemory() { + trei.clear(); + ro.clear(); + spillIntervals.clear(); + spillFor.clear(); + useDefs.clear(); + } + + void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) { + + if (!ro.shouldRenderCurrentMachineFunction()) + return; + + for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg), + rEnd = mri->reg_end(); + rItr != rEnd; ++rItr) { + const MachineInstr *mi = &*rItr; + if (mi->readsRegister(li->reg)) { + useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex()); + } + if (mi->definesRegister(li->reg)) { + useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex()); + } + } + } + + void RenderMachineFunction::rememberSpills( + const LiveInterval *li, + const std::vector<LiveInterval*> &spills) { + + if (!ro.shouldRenderCurrentMachineFunction()) + return; + + for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(), + siEnd = spills.end(); + siItr != siEnd; ++siItr) { + const LiveInterval *spill = *siItr; + spillIntervals[li].insert(spill); + spillFor[spill] = li; + } + } + + bool RenderMachineFunction::isSpill(const LiveInterval *li) const { + SpillForMap::const_iterator sfItr = spillFor.find(li); + if (sfItr == spillFor.end()) + return false; + return true; + } + + void RenderMachineFunction::renderMachineFunction( + const char *renderContextStr, + const VirtRegMap *vrm, + const char *renderSuffix) { + if (!ro.shouldRenderCurrentMachineFunction()) + return; + + this->vrm = vrm; + trei.reset(); + + std::string rpFileName(mf->getFunction()->getName().str() + + (renderSuffix ? renderSuffix : "") + + outputFileSuffix); + + std::string errMsg; + raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary); + + renderFunctionPage(outFile, renderContextStr); + + ro.resetRenderSpecificOptions(); + } + + std::string RenderMachineFunction::escapeChars(const std::string &s) const { + return escapeChars(s.begin(), s.end()); + } + +} diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h new file mode 100644 index 0000000000000..8d56a8292ac59 --- /dev/null +++ b/lib/CodeGen/RenderMachineFunction.h @@ -0,0 +1,336 @@ +//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H +#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H + +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <algorithm> +#include <map> +#include <set> +#include <string> + +namespace llvm { + + class LiveInterval; + class LiveIntervals; + class MachineInstr; + class MachineRegisterInfo; + class RenderMachineFunction; + class TargetRegisterClass; + class TargetRegisterInfo; + class VirtRegMap; + class raw_ostream; + + /// \brief Helper class to process rendering options. Tries to be as lazy as + /// possible. + class MFRenderingOptions { + public: + + struct RegClassComp { + bool operator()(const TargetRegisterClass *trc1, + const TargetRegisterClass *trc2) const { + std::string trc1Name(trc1->getName()), trc2Name(trc2->getName()); + return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(), + trc2Name.begin(), trc2Name.end()); + } + }; + + typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet; + + struct IntervalComp { + bool operator()(const LiveInterval *li1, const LiveInterval *li2) const { + return li1->reg < li2->reg; + } + }; + + typedef std::set<const LiveInterval*, IntervalComp> IntervalSet; + + /// Initialise the rendering options. + void setup(MachineFunction *mf, const TargetRegisterInfo *tri, + LiveIntervals *lis, const RenderMachineFunction *rmf); + + /// Clear translations of options to the current function. + void clear(); + + /// Reset any options computed for this specific rendering. + void resetRenderSpecificOptions(); + + /// Should we render the current function. + bool shouldRenderCurrentMachineFunction() const; + + /// Return the set of register classes to render pressure for. + const RegClassSet& regClasses() const; + + /// Return the set of live intervals to render liveness for. + const IntervalSet& intervals() const; + + /// Render indexes which are not associated with instructions / MBB starts. + bool renderEmptyIndexes() const; + + /// Return whether or not to render using SVG for fancy vertical text. + bool fancyVerticals() const; + + private: + + static bool renderingOptionsProcessed; + static std::set<std::string> mfNamesToRender; + static bool renderAllMFs; + + static std::set<std::string> classNamesToRender; + static bool renderAllClasses; + + + static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender; + typedef enum { ExplicitOnly = 0, + AllPhys = 1, + VirtNoSpills = 2, + VirtSpills = 4, + AllVirt = 6, + All = 7 } + IntervalTypesToRender; + static unsigned intervalTypesToRender; + + template <typename OutputItr> + static void splitComaSeperatedList(const std::string &s, OutputItr outItr); + + static void processOptions(); + + static void processFuncNames(); + static void processRegClassNames(); + static void processIntervalNumbers(); + + static void processIntervalRange(const std::string &intervalRangeStr); + + MachineFunction *mf; + const TargetRegisterInfo *tri; + LiveIntervals *lis; + const RenderMachineFunction *rmf; + + mutable bool regClassesTranslatedToCurrentFunction; + mutable RegClassSet regClassSet; + + mutable bool intervalsTranslatedToCurrentFunction; + mutable IntervalSet intervalSet; + + void translateRegClassNamesToCurrentFunction() const; + + void translateIntervalNumbersToCurrentFunction() const; + }; + + /// \brief Provide extra information about the physical and virtual registers + /// in the function being compiled. + class TargetRegisterExtraInfo { + public: + TargetRegisterExtraInfo(); + + /// \brief Set up TargetRegisterExtraInfo with pointers to necessary + /// sources of information. + void setup(MachineFunction *mf, MachineRegisterInfo *mri, + const TargetRegisterInfo *tri, LiveIntervals *lis); + + /// \brief Recompute tables for changed function. + void reset(); + + /// \brief Free all tables in TargetRegisterExtraInfo. + void clear(); + + /// \brief Maximum number of registers from trc which alias reg. + unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const; + + /// \brief Returns the number of allocable registers in trc. + unsigned getCapacity(const TargetRegisterClass *trc) const; + + /// \brief Return the number of registers of class trc that may be + /// needed at slot i. + unsigned getPressureAtSlot(const TargetRegisterClass *trc, + SlotIndex i) const; + + /// \brief Return true if the number of registers of type trc that may be + /// needed at slot i is greater than the capacity of trc. + bool classOverCapacityAtSlot(const TargetRegisterClass *trc, + SlotIndex i) const; + + private: + + MachineFunction *mf; + MachineRegisterInfo *mri; + const TargetRegisterInfo *tri; + LiveIntervals *lis; + + typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine; + typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap; + VRWorstMap vrWorst; + + typedef std::map<unsigned, WorstMapLine> PRWorstMap; + PRWorstMap prWorst; + + typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap; + CapacityMap capacityMap; + + typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine; + typedef std::map<SlotIndex, PressureMapLine> PressureMap; + PressureMap pressureMap; + + bool mapsPopulated; + + /// \brief Initialise the 'worst' table. + void initWorst(); + + /// \brief Initialise the 'capacity' table. + void initCapacity(); + + /// \brief Initialise/Reset the 'pressure' and live states tables. + void resetPressureAndLiveStates(); + }; + + /// \brief Render MachineFunction objects and related information to a HTML + /// page. + class RenderMachineFunction : public MachineFunctionPass { + public: + static char ID; + + RenderMachineFunction() : MachineFunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + virtual bool runOnMachineFunction(MachineFunction &fn); + + virtual void releaseMemory(); + + void rememberUseDefs(const LiveInterval *li); + + void rememberSpills(const LiveInterval *li, + const std::vector<LiveInterval*> &spills); + + bool isSpill(const LiveInterval *li) const; + + /// \brief Render this machine function to HTML. + /// + /// @param renderContextStr This parameter will be included in the top of + /// the html file to explain where (in the + /// codegen pipeline) this function was rendered + /// from. Set it to something like + /// "Pre-register-allocation". + /// @param vrm If non-null the VRM will be queried to determine + /// whether a virtual register was allocated to a + /// physical register or spilled. + /// @param renderFilePrefix This string will be appended to the function + /// name (before the output file suffix) to enable + /// multiple renderings from the same function. + void renderMachineFunction(const char *renderContextStr, + const VirtRegMap *vrm = 0, + const char *renderSuffix = 0); + + private: + class Spacer; + friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s); + + std::string fqn; + + MachineFunction *mf; + MachineRegisterInfo *mri; + const TargetRegisterInfo *tri; + LiveIntervals *lis; + SlotIndexes *sis; + const VirtRegMap *vrm; + + TargetRegisterExtraInfo trei; + MFRenderingOptions ro; + + + + // Utilities. + typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState; + LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const; + + typedef enum { Zero, Low, High } PressureState; + PressureState getPressureStateAt(const TargetRegisterClass *trc, + SlotIndex i) const; + + typedef std::map<const LiveInterval*, std::set<const LiveInterval*> > + SpillIntervals; + SpillIntervals spillIntervals; + + typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap; + SpillForMap spillFor; + + typedef std::set<SlotIndex> SlotSet; + typedef std::map<const LiveInterval*, SlotSet> UseDefs; + UseDefs useDefs; + + // ---------- Rendering methods ---------- + + /// For inserting spaces when pretty printing. + class Spacer { + public: + explicit Spacer(unsigned numSpaces) : ns(numSpaces) {} + Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); } + void print(raw_ostream &os) const; + private: + unsigned ns; + }; + + Spacer s(unsigned ns) const; + + template <typename Iterator> + std::string escapeChars(Iterator sBegin, Iterator sEnd) const; + + /// \brief Render a machine instruction. + void renderMachineInstr(raw_ostream &os, + const MachineInstr *mi) const; + + /// \brief Render vertical text. + template <typename T> + void renderVertical(const Spacer &indent, + raw_ostream &os, + const T &t) const; + + /// \brief Insert CSS layout info. + void insertCSS(const Spacer &indent, + raw_ostream &os) const; + + /// \brief Render a brief summary of the function (including rendering + /// context). + void renderFunctionSummary(const Spacer &indent, + raw_ostream &os, + const char * const renderContextStr) const; + + /// \brief Render a legend for the pressure table. + void renderPressureTableLegend(const Spacer &indent, + raw_ostream &os) const; + + /// \brief Render a consecutive set of HTML cells of the same class using + /// the colspan attribute for run-length encoding. + template <typename CellType> + void renderCellsWithRLE( + const Spacer &indent, raw_ostream &os, + const std::pair<CellType, unsigned> &rleAccumulator, + const std::map<CellType, std::string> &cellTypeStrs) const; + + /// \brief Render code listing, potentially with register pressure + /// and live intervals shown alongside. + void renderCodeTablePlusPI(const Spacer &indent, + raw_ostream &os) const; + + /// \brief Render the HTML page representing the MachineFunction. + void renderFunctionPage(raw_ostream &os, + const char * const renderContextStr) const; + + std::string escapeChars(const std::string &s) const; + }; +} + +#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */ diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 09202f84cb29d..ea93dd5c66633 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -32,7 +32,8 @@ using namespace llvm; ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt) - : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) { + : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()), + Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) { MFI = mf.getFrameInfo(); DbgValueVec.clear(); } @@ -159,8 +160,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; // Keep track of dangling debug references to registers. - std::pair<MachineInstr*, unsigned> - DanglingDebugValue[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<std::pair<MachineInstr*, unsigned> > + DanglingDebugValue(TRI->getNumRegs(), + std::make_pair(static_cast<MachineInstr*>(0), 0)); // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); @@ -172,7 +174,6 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. DbgValueVec.clear(); - std::memset(DanglingDebugValue, 0, sizeof(DanglingDebugValue)); // Walk the list of instructions, from bottom moving up. for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index d90659bb163ef..c8f543f7146dd 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -106,8 +106,8 @@ namespace llvm { /// are as we iterate upward through the instructions. This is allocated /// here instead of inside BuildSchedGraph to avoid the need for it to be /// initialized and destructed for each block. - std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister]; - std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister]; + std::vector<std::vector<SUnit *> > Defs; + std::vector<std::vector<SUnit *> > Uses; /// DbgValueVec - Remember DBG_VALUEs that refer to a particular /// register. diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e671752464572..c9c4d91e97361 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4489,6 +4489,16 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // If this is a conversion of N elements of one type to N elements of another // type, convert each element. This handles FP<->INT cases. if (SrcBitSize == DstBitSize) { + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + BV->getValueType(0).getVectorNumElements()); + + // Due to the FP element handling below calling this routine recursively, + // we can end up with a scalar-to-vector node here. + if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), + DstEltVT, BV->getOperand(0))); + SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { SDValue Op = BV->getOperand(i); @@ -4500,8 +4510,6 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } - EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, - BV->getValueType(0).getVectorNumElements()); return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, &Ops[0], Ops.size()); } @@ -5790,7 +5798,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); SDValue N0 = Value.getOperand(0); - if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) { + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + Chain == SDValue(N0.getNode(), 1)) { LoadSDNode *LD = cast<LoadSDNode>(N0); if (LD->getBasePtr() != Ptr) return SDValue(); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index decaa769e99fe..a4eed71e65c01 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -94,7 +94,7 @@ bool FastISel::hasTrivialKill(const Value *V) const { !(I->getOpcode() == Instruction::BitCast || I->getOpcode() == Instruction::PtrToInt || I->getOpcode() == Instruction::IntToPtr) && - cast<Instruction>(I->use_begin())->getParent() == I->getParent(); + cast<Instruction>(*I->use_begin())->getParent() == I->getParent(); } unsigned FastISel::getRegForValue(const Value *V) { @@ -146,7 +146,7 @@ unsigned FastISel::getRegForValue(const Value *V) { return Reg; } -/// materializeRegForValue - Helper for getRegForVale. This function is +/// materializeRegForValue - Helper for getRegForValue. This function is /// called when the value isn't already available in a register and must /// be materialized with new instructions. unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { @@ -276,6 +276,7 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { void FastISel::recomputeInsertPt() { if (getLastLocalValue()) { FuncInfo.InsertPt = getLastLocalValue(); + FuncInfo.MBB = FuncInfo.InsertPt->getParent(); ++FuncInfo.InsertPt; } else FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); @@ -472,17 +473,7 @@ bool FastISel::SelectCall(const User *I) { return true; const AllocaInst *AI = dyn_cast<AllocaInst>(Address); // Don't handle byval struct arguments or VLAs, for example. - // Note that if we have a byval struct argument, fast ISel is turned off; - // those are handled in SelectionDAGBuilder. - if (AI) { - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI == FuncInfo.StaticAllocaMap.end()) break; // VLAs. - int FI = SI->second; - if (!DI->getDebugLoc().isUnknown()) - FuncInfo.MF->getMMI().setVariableDbgInfo(DI->getVariable(), - FI, DI->getDebugLoc()); - } else + if (!AI) // Building the map above is target independent. Generating DBG_VALUE // inline is target dependent; do this now. (void)TargetSelectInstruction(cast<Instruction>(I)); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 928e1ecd4cf4e..5ef6404ee5d6b 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -111,17 +112,56 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + + // The object may need to be placed onto the stack near the stack + // protector if one exists. Determine here if this object is a suitable + // candidate. I.e., it would trigger the creation of a stack protector. + bool MayNeedSP = + (AI->isArrayAllocation() || + (TySize > 8 && isa<ArrayType>(Ty) && + cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP); } for (; BB != EB; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Mark values used outside their block as exported, by allocating + // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I))) InitializeRegForValue(I); + // Collect llvm.dbg.declare information. This is done now instead of + // during the initial isel pass through the IR so that it is done + // in a predictable order. + if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { + MachineModuleInfo &MMI = MF->getMMI(); + if (MMI.hasDebugInfo() && + DIVariable(DI->getVariable()).Verify() && + !DI->getDebugLoc().isUnknown()) { + // Don't handle byval struct arguments or VLAs, for example. + // Non-byval arguments are handled here (they refer to the stack + // temporary alloca at this point). + const Value *Address = DI->getAddress(); + if (Address) { + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { + DenseMap<const AllocaInst *, int>::iterator SI = + StaticAllocaMap.find(AI); + if (SI != StaticAllocaMap.end()) { // Check for VLAs. + int FI = SI->second; + MMI.setVariableDbgInfo(DI->getVariable(), + FI, DI->getDebugLoc()); + } + } + } + } + } + } + // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This // also creates the initial PHI MachineInstrs, though none of the input // operands are populated. @@ -181,6 +221,7 @@ void FunctionLoweringInfo::clear() { #endif LiveOutRegInfo.clear(); ArgDbgValues.clear(); + ByValArgFrameIndexMap.clear(); RegFixups.clear(); } @@ -214,6 +255,28 @@ unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) { return FirstReg; } +/// setByValArgumentFrameIndex - Record frame index for the byval +/// argument. This overrides previous frame index entry for this argument, +/// if any. +void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A, + int FI) { + assert (A->hasByValAttr() && "Argument does not have byval attribute!"); + ByValArgFrameIndexMap[A] = FI; +} + +/// getByValArgumentFrameIndex - Get frame index for the byval argument. +/// If the argument does not have any assigned frame index then 0 is +/// returned. +int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) { + assert (A->hasByValAttr() && "Argument does not have byval attribute!"); + DenseMap<const Argument *, int>::iterator I = + ByValArgFrameIndexMap.find(A); + if (I != ByValArgFrameIndexMap.end()) + return I->second; + DEBUG(dbgs() << "Argument does not have assigned frame index!"); + return 0; +} + /// AddCatchInfo - Extract the personality and type infos from an eh.selector /// call, and add them to the specified machine basic block. void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7a47da4ec52ef..2981cd3f1cabd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -100,8 +100,7 @@ public: /// it is already legal or we need to expand it into multiple registers of /// smaller integer type, or we need to promote it to a larger type. LegalizeAction getTypeAction(EVT VT) const { - return - (LegalizeAction)ValueTypeActions.getTypeAction(*DAG.getContext(), VT); + return (LegalizeAction)ValueTypeActions.getTypeAction(VT); } /// isTypeLegal - Return true if this type is legal on this target. @@ -1314,21 +1313,30 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } break; case TargetLowering::Expand: - // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND - // f128 = EXTLOAD {f32,f64} too - if ((SrcVT == MVT::f32 && (Node->getValueType(0) == MVT::f64 || - Node->getValueType(0) == MVT::f128)) || - (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) { + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); - Result = DAG.getNode(ISD::FP_EXTEND, dl, - Node->getValueType(0), Load); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); + break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); + } + Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); Tmp1 = LegalizeOp(Result); // Relegalize new nodes. Tmp2 = LegalizeOp(Load.getValue(1)); break; } + // FIXME: This does not work for vectors on most targets. Sign- and + // zero-extend operations are currently folded into extending loads, + // whether they are legal or not, and then we end up here without any + // support for legalizing them. assert(ExtType != ISD::EXTLOAD && "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index b94ea9a3a9afb..f8c5890719219 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -234,8 +234,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { // The pair element type may be legal, or may not promote to the same type as // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), - TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), - JoinIntegers(N->getOperand(0), N->getOperand(1))); + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), JoinIntegers(N->getOperand(0), + N->getOperand(1))); } SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { @@ -245,7 +246,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { // Zero extend things like i1, sign extend everything else. It shouldn't // matter in theory which one we pick, but this tends to give better code? unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(*DAG.getContext(), VT), + SDValue Result = DAG.getNode(Opc, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), VT), SDValue(N, 0)); assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?"); return Result; @@ -310,8 +312,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT - // and SINT conversions are Custom, there is no way to tell which is preferable. - // We choose SINT because that's the right thing on PPC.) + // and SINT conversions are Custom, there is no way to tell which is + // preferable. We choose SINT because that's the right thing on PPC.) if (N->getOpcode() == ISD::FP_TO_UINT && !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) @@ -1030,7 +1032,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, Hi = InL; } else if (Amt == 1 && TLI.isOperationLegalOrCustom(ISD::ADDC, - TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { + TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { // Emit this X << 1 as X+X. SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); SDValue LoOps[2] = { InL, InL }; @@ -1926,7 +1928,8 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueType().getSizeInBits(); Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, - DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); } } @@ -2046,7 +2049,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, unsigned ExcessBits = Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); Hi = DAG.getZeroExtendInReg(Hi, dl, - EVT::getIntegerVT(*DAG.getContext(), ExcessBits)); + EVT::getIntegerVT(*DAG.getContext(), + ExcessBits)); } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bd86694446d6a..d56029208e61a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -75,7 +75,7 @@ private: /// getTypeAction - Return how we should legalize values of this type. LegalizeAction getTypeAction(EVT VT) const { - switch (ValueTypeActions.getTypeAction(*DAG.getContext(), VT)) { + switch (ValueTypeActions.getTypeAction(VT)) { default: assert(false && "Unknown legalize action!"); case TargetLowering::Legal: @@ -86,8 +86,7 @@ private: // 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32). if (!VT.isVector()) return PromoteInteger; - else - return WidenVector; + return WidenVector; case TargetLowering::Expand: // Expand can mean // 1) split scalar in half, 2) convert a float to an integer, @@ -95,23 +94,21 @@ private: if (!VT.isVector()) { if (VT.isInteger()) return ExpandInteger; - else if (VT.getSizeInBits() == - TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits()) + if (VT.getSizeInBits() == + TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits()) return SoftenFloat; - else - return ExpandFloat; - } else if (VT.getVectorNumElements() == 1) { - return ScalarizeVector; - } else { - return SplitVector; + return ExpandFloat; } + + if (VT.getVectorNumElements() == 1) + return ScalarizeVector; + return SplitVector; } } /// isTypeLegal - Return true if this type is legal on this target. bool isTypeLegal(EVT VT) const { - return (ValueTypeActions.getTypeAction(*DAG.getContext(), VT) == - TargetLowering::Legal); + return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal; } /// IgnoreNodeResults - Pretend all of this node's results are legal. @@ -584,6 +581,7 @@ private: SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 93aeff5c1e6cf..93bc2d04928e7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -983,6 +983,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; @@ -1091,8 +1092,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); return SDValue(DAG.UpdateNodeOperands(N, Hi, DAG.getConstant(IdxVal - LoElts, - Idx.getValueType())), - 0); + Idx.getValueType())), 0); } // Store the vector to the stack. @@ -1113,7 +1113,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { assert(N->isUnindexed() && "Indexed store of vector?"); assert(OpNo == 1 && "Can only split the stored value"); - DebugLoc dl = N->getDebugLoc(); + DebugLoc DL = N->getDebugLoc(); bool isTruncating = N->isTruncatingStore(); SDValue Ch = N->getChain(); @@ -1132,25 +1132,49 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) - Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, LoMemVT, isVol, isNT, Alignment); else - Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, isVol, isNT, Alignment); // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); SVOffset += IncrementSize; if (isTruncating) - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, HiMemVT, isVol, isNT, Alignment); else - Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, isVol, isNT, Alignment); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { + DebugLoc DL = N->getDebugLoc(); + + // The input operands all must have the same type, and we know the result the + // result type is valid. Convert this to a buildvector which extracts all the + // input elements. + // TODO: If the input elements are power-two vectors, we could convert this to + // a new CONCAT_VECTORS node with elements that are half-wide. + SmallVector<SDValue, 32> Elts; + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { + SDValue Op = N->getOperand(op); + for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); + i != e; ++i) { + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, + Op, DAG.getIntPtrConstant(i))); + + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), + &Elts[0], Elts.size()); } @@ -1274,8 +1298,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) { - NumElts = NumElts / 2; - VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); } if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { @@ -1283,124 +1307,123 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); - } else if (NumElts == 1) { - // No legal vector version so unroll the vector operation and then widen. + } + + // No legal vector version so unroll the vector operation and then widen. + if (NumElts == 1) return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); - } else { - // Since the operation can trap, apply operation on the original vector. - EVT MaxVT = VT; - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); - - SmallVector<SDValue, 16> ConcatOps(CurNumElts); - unsigned ConcatEnd = 0; // Current ConcatOps index. - int Idx = 0; // Current Idx into input vectors. - - // NumElts := greatest synthesizable vector size (at most WidenVT) - // while (orig. vector has unhandled elements) { - // take munches of size NumElts from the beginning and add to ConcatOps - // NumElts := next smaller supported vector size or 1 - // } - while (CurNumElts != 0) { - while (CurNumElts >= NumElts) { - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getIntPtrConstant(Idx)); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getIntPtrConstant(Idx)); - ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); - Idx += NumElts; - CurNumElts -= NumElts; - } - do { - NumElts = NumElts / 2; - VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); - } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); - - if (NumElts == 1) { - for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp1, DAG.getIntPtrConstant(Idx)); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp2, DAG.getIntPtrConstant(Idx)); - ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, - EOp1, EOp2); - } - CurNumElts = 0; + + // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); + + SmallVector<SDValue, 16> ConcatOps(CurNumElts); + unsigned ConcatEnd = 0; // Current ConcatOps index. + int Idx = 0; // Current Idx into input vectors. + + // NumElts := greatest synthesizable vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } + while (CurNumElts != 0) { + while (CurNumElts >= NumElts) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getIntPtrConstant(Idx)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); + Idx += NumElts; + CurNumElts -= NumElts; + } + do { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); + + if (NumElts == 1) { + for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp1, DAG.getIntPtrConstant(Idx)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp2, DAG.getIntPtrConstant(Idx)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, + EOp1, EOp2); } + CurNumElts = 0; } + } - // Check to see if we have a single operation with the widen type. - if (ConcatEnd == 1) { - VT = ConcatOps[0].getValueType(); - if (VT == WidenVT) - return ConcatOps[0]; - } + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } - // while (Some element of ConcatOps is not of type MaxVT) { - // From the end of ConcatOps, collect elements of the same type and put - // them into an op of the next larger supported type - // } - while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { - Idx = ConcatEnd - 1; - VT = ConcatOps[Idx--].getValueType(); - while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) - Idx--; - - int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; - EVT NextVT; - do { - NextSize *= 2; - NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); - } while (!TLI.isTypeSynthesizable(NextVT)); - - if (!VT.isVector()) { - // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT - SDValue VecOp = DAG.getUNDEF(NextVT); - unsigned NumToInsert = ConcatEnd - Idx - 1; - for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, - ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); - } - ConcatOps[Idx+1] = VecOp; - ConcatEnd = Idx + 2; - } - else { - // Vector type, create a CONCAT_VECTORS of type NextVT - SDValue undefVec = DAG.getUNDEF(VT); - unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); - SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); - unsigned RealVals = ConcatEnd - Idx - 1; - unsigned SubConcatEnd = 0; - unsigned SubConcatIdx = Idx + 1; - while (SubConcatEnd < RealVals) - SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; - while (SubConcatEnd < OpsToConcat) - SubConcatOps[SubConcatEnd++] = undefVec; - ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, - NextVT, &SubConcatOps[0], - OpsToConcat); - ConcatEnd = SubConcatIdx + 1; + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + Idx = ConcatEnd - 1; + VT = ConcatOps[Idx--].getValueType(); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeSynthesizable(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, + ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); } + ConcatOps[Idx+1] = VecOp; + ConcatEnd = Idx + 2; + } else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, &SubConcatOps[0], + OpsToConcat); + ConcatEnd = SubConcatIdx + 1; } + } - // Check to see if we have a single operation with the widen type. - if (ConcatEnd == 1) { - VT = ConcatOps[0].getValueType(); - if (VT == WidenVT) - return ConcatOps[0]; - } - - // add undefs of size MaxVT until ConcatOps grows to length of WidenVT - unsigned NumOps = - WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); - if (NumOps != ConcatEnd ) { - SDValue UndefVal = DAG.getUNDEF(MaxVT); - for (unsigned j = ConcatEnd; j < NumOps; ++j) - ConcatOps[j] = UndefVal; - } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps); + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; } + + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); + if (NumOps != ConcatEnd ) { + SDValue UndefVal = DAG.getUNDEF(MaxVT); + for (unsigned j = ConcatEnd; j < NumOps; ++j) + ConcatOps[j] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps); } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { @@ -1561,8 +1584,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { unsigned NewNumElts = WidenSize / InSize; if (InVT.isVector()) { EVT InEltVT = InVT.getVectorElementType(); - NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, - WidenSize / InEltVT.getSizeInBits()); + NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, + WidenSize / InEltVT.getSizeInBits()); } else { NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } @@ -1686,8 +1709,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SDValue RndOp = N->getOperand(3); SDValue SatOp = N->getOperand(4); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), - N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); EVT InVT = InOp.getValueType(); @@ -1720,9 +1742,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SmallVector<SDValue, 16> Ops(NumConcat); Ops[0] = InOp; SDValue UndefVal = DAG.getUNDEF(InVT); - for (unsigned i = 1; i != NumConcat; ++i) { + for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - } + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); @@ -2225,25 +2247,24 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // Check if we can load the element with one instruction if (LdWidth <= NewVTWidth) { - if (NewVT.isVector()) { - if (NewVT != WidenVT) { - assert(WidenWidth % NewVTWidth == 0); - unsigned NumConcat = WidenWidth / NewVTWidth; - SmallVector<SDValue, 16> ConcatOps(NumConcat); - SDValue UndefVal = DAG.getUNDEF(NewVT); - ConcatOps[0] = LdOp; - for (unsigned i = 1; i != NumConcat; ++i) - ConcatOps[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], - NumConcat); - } else - return LdOp; - } else { + if (!NewVT.isVector()) { unsigned NumElts = WidenWidth / NewVTWidth; EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp); } + if (NewVT == WidenVT) + return LdOp; + + assert(WidenWidth % NewVTWidth == 0); + unsigned NumConcat = WidenWidth / NewVTWidth; + SmallVector<SDValue, 16> ConcatOps(NumConcat); + SDValue UndefVal = DAG.getUNDEF(NewVT); + ConcatOps[0] = LdOp; + for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], + NumConcat); } // Load vector by using multiple loads from largest vector to scalar @@ -2276,52 +2297,55 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // Build the vector from the loads operations unsigned End = LdOps.size(); - if (LdOps[0].getValueType().isVector()) { - // If the load contains vectors, build the vector using concat vector. - // All of the vectors used to loads are power of 2 and the scalars load - // can be combined to make a power of 2 vector. - SmallVector<SDValue, 16> ConcatOps(End); - int i = End - 1; - int Idx = End; - EVT LdTy = LdOps[i].getValueType(); - // First combine the scalar loads to a vector - if (!LdTy.isVector()) { - for (--i; i >= 0; --i) { - LdTy = LdOps[i].getValueType(); - if (LdTy.isVector()) - break; - } - ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); - } - ConcatOps[--Idx] = LdOps[i]; + if (!LdOps[0].getValueType().isVector()) + // All the loads are scalar loads. + return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); + + // If the load contains vectors, build the vector using concat vector. + // All of the vectors used to loads are power of 2 and the scalars load + // can be combined to make a power of 2 vector. + SmallVector<SDValue, 16> ConcatOps(End); + int i = End - 1; + int Idx = End; + EVT LdTy = LdOps[i].getValueType(); + // First combine the scalar loads to a vector + if (!LdTy.isVector()) { for (--i; i >= 0; --i) { - EVT NewLdTy = LdOps[i].getValueType(); - if (NewLdTy != LdTy) { - // Create a larger vector - ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, - &ConcatOps[Idx], End - Idx); - Idx = End - 1; - LdTy = NewLdTy; - } - ConcatOps[--Idx] = LdOps[i]; + LdTy = LdOps[i].getValueType(); + if (LdTy.isVector()) + break; } + ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); + } + ConcatOps[--Idx] = LdOps[i]; + for (--i; i >= 0; --i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + // Create a larger vector + ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, + &ConcatOps[Idx], End - Idx); + Idx = End - 1; + LdTy = NewLdTy; + } + ConcatOps[--Idx] = LdOps[i]; + } - if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) { - // We need to fill the rest with undefs to build the vector - unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); - SmallVector<SDValue, 16> WidenOps(NumOps); - SDValue UndefVal = DAG.getUNDEF(LdTy); - unsigned i = 0; - for (; i != End-Idx; ++i) - WidenOps[i] = ConcatOps[Idx+i]; - for (; i != NumOps; ++i) - WidenOps[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps); - } else - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - &ConcatOps[Idx], End - Idx); - } else // All the loads are scalar loads. - return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); + if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + &ConcatOps[Idx], End - Idx); + + // We need to fill the rest with undefs to build the vector + unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); + SmallVector<SDValue, 16> WidenOps(NumOps); + SDValue UndefVal = DAG.getUNDEF(LdTy); + { + unsigned i = 0; + for (; i != End-Idx; ++i) + WidenOps[i] = ConcatOps[Idx+i]; + for (; i != NumOps; ++i) + WidenOps[i] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps); } SDValue diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 3b86c3286585f..fae27294e3646 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" +#include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -432,6 +433,30 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, return N->getValueType(NumRes); } +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVector<unsigned, 4> &LRegs, + const TargetRegisterInfo *TRI) { + bool Added = false; + if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { + if (RegAdded.insert(Reg)) { + LRegs.push_back(Reg); + Added = true; + } + } + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { + if (RegAdded.insert(*Alias)) { + LRegs.push_back(*Alias); + Added = true; + } + } + return Added; +} + /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay /// scheduling of the given node to satisfy live physical register dependencies. /// If the specific node is the last one that's available to schedule, do @@ -446,37 +471,44 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isAssignedRegDep()) { - unsigned Reg = I->getReg(); - if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) { - if (RegAdded.insert(Reg)) - LRegs.push_back(Reg); - } - for (const unsigned *Alias = TRI->getAliasSet(Reg); - *Alias; ++Alias) - if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) { - if (RegAdded.insert(*Alias)) - LRegs.push_back(*Alias); - } + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + RegAdded, LRegs, TRI); } } for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + if (Node->getOpcode() == ISD::INLINEASM) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + ++i; // Skip the ID value. + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags)) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } if (!Node->isMachineOpcode()) continue; const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); if (!TID.ImplicitDefs) continue; for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { - if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) { - if (RegAdded.insert(*Reg)) - LRegs.push_back(*Reg); - } - for (const unsigned *Alias = TRI->getAliasSet(*Reg); - *Alias; ++Alias) - if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { - if (RegAdded.insert(*Alias)) - LRegs.push_back(*Alias); - } + CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } return !LRegs.empty(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 3ef521c398e11..4c3e4e3b07680 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -24,6 +24,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -54,10 +55,16 @@ static RegisterScheduler static RegisterScheduler hybridListDAGScheduler("list-hybrid", - "Bottom-up rr list scheduling which avoid stalls for " - "long latency instructions", + "Bottom-up register pressure aware list scheduling " + "which tries to balance latency and register pressure", createHybridListDAGScheduler); +static RegisterScheduler + ILPListDAGScheduler("list-ilp", + "Bottom-up register pressure aware list scheduling " + "which tries to balance ILP and register pressure", + createILPListDAGScheduler); + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -181,7 +188,9 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { - DEBUG(dbgs() << "********** List Scheduling **********\n"); + DEBUG(dbgs() + << "********** List Scheduling BB#" << BB->getNumber() + << " **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); @@ -273,6 +282,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { SU->setHeightToAtLeast(CurCycle); Sequence.push_back(SU); + AvailableQueue->ScheduledNode(SU); + ReleasePredecessors(SU, CurCycle); // Release all the implicit physical register defs that are live. @@ -291,7 +302,6 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { } SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); } /// CapturePred - This does the opposite of ReleasePred. Since SU is being @@ -315,8 +325,6 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); DEBUG(SU->dump(this)); - AvailableQueue->UnscheduledNode(SU); - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { CapturePred(&*I); @@ -346,6 +354,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { SU->isScheduled = false; SU->isAvailable = true; AvailableQueue->push(SU); + AvailableQueue->UnscheduledNode(SU); } /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in @@ -956,7 +965,8 @@ namespace { template<class SF> class RegReductionPriorityQueue; - /// Sorting functions for the Available queue. + /// bu_ls_rr_sort - Priority function for bottom up register pressure + // reduction scheduler. struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} @@ -965,6 +975,8 @@ namespace { bool operator()(const SUnit* left, const SUnit* right) const; }; + // td_ls_rr_sort - Priority function for top down register pressure reduction + // scheduler. struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} @@ -973,6 +985,7 @@ namespace { bool operator()(const SUnit* left, const SUnit* right) const; }; + // src_ls_rr_sort - Priority function for source order scheduler. struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) @@ -983,13 +996,26 @@ namespace { bool operator()(const SUnit* left, const SUnit* right) const; }; + // hybrid_ls_rr_sort - Priority function for hybrid scheduler. struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) : SPQ(spq) {} hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - + + bool operator()(const SUnit* left, const SUnit* right) const; + }; + + // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) + // scheduler. + struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ; + ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq) + : SPQ(spq) {} + ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + bool operator()(const SUnit* left, const SUnit* right) const; }; } // end anonymous namespace @@ -1029,23 +1055,48 @@ namespace { std::vector<SUnit*> Queue; SF Picker; unsigned CurQueueId; + bool TracksRegPressure; protected: // SUnits - The SUnits for the current graph. std::vector<SUnit> *SUnits; - + + MachineFunction &MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const TargetLowering *TLI; ScheduleDAGRRList *scheduleDAG; // SethiUllmanNumbers - The SethiUllman number for each node. std::vector<unsigned> SethiUllmanNumbers; + /// RegPressure - Tracking current reg pressure per register class. + /// + std::vector<unsigned> RegPressure; + + /// RegLimit - Tracking the number of allocatable registers per register + /// class. + std::vector<unsigned> RegLimit; + public: - RegReductionPriorityQueue(const TargetInstrInfo *tii, - const TargetRegisterInfo *tri) - : Picker(this), CurQueueId(0), - TII(tii), TRI(tri), scheduleDAG(NULL) {} + RegReductionPriorityQueue(MachineFunction &mf, + bool tracksrp, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp), + MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { + if (TracksRegPressure) { + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); + } + } void initNodes(std::vector<SUnit> &sunits) { SUnits = &sunits; @@ -1072,6 +1123,7 @@ namespace { void releaseState() { SUnits = 0; SethiUllmanNumbers.clear(); + std::fill(RegPressure.begin(), RegPressure.end(), 0); } unsigned getNodePriority(const SUnit *SU) const { @@ -1139,10 +1191,244 @@ namespace { SU->NodeQueueId = 0; } + bool HighRegPressure(const SUnit *SU) const { + if (!TLI) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + // Check if this increases register pressure of the specific register + // class to the point where it would cause spills. + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + // Check if this increases register pressure of the specific register + // class to the point where it would cause spills. + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = PN->getValueType(i); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + return true; // Reg pressure already high. + unsigned Cost = TLI->getRepRegClassCostFor(VT); + if (!PN->hasAnyUseOfValue(i)) + continue; + // Check if this increases register pressure of the specific register + // class to the point where it would cause spills. + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + } + } + + return false; + } + + void ScheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + const SDNode *N = SU->getNode(); + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + if (PredSU->NumSuccsLeft != PredSU->NumSuccs) + continue; + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = PN->getValueType(i); + if (!PN->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + } + + // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() + // may transfer data dependencies to CopyToReg. + if (SU->NumSuccs && N->isMachineOpcode()) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = N->getValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + + dumpRegPressure(); + } + + void UnscheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + const SDNode *N = SU->getNode(); + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + if (PredSU->NumSuccsLeft != PredSU->NumSuccs) + continue; + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = PN->getValueType(i); + if (!PN->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + + // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() + // may transfer data dependencies to CopyToReg. + if (SU->NumSuccs && N->isMachineOpcode()) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Flag || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + } + + dumpRegPressure(); + } + void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { scheduleDAG = scheduleDag; } + void dumpRegPressure() const { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + unsigned Id = RC->getID(); + unsigned RP = RegPressure[Id]; + if (!RP) continue; + DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] + << '\n'); + } + } + protected: bool canClobber(const SUnit *SU, const SUnit *Op); void AddPseudoTwoAddrDeps(); @@ -1161,6 +1447,9 @@ namespace { typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> HybridBURRPriorityQueue; + + typedef RegReductionPriorityQueue<ilp_ls_rr_sort> + ILPBURRPriorityQueue; } /// closestSucc - Returns the scheduled cycle of the successor which is @@ -1260,30 +1549,63 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { } bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ - bool LStall = left->SchedulingPref == Sched::Latency && - SPQ->getCurCycle() < left->getHeight(); - bool RStall = right->SchedulingPref == Sched::Latency && - SPQ->getCurCycle() < right->getHeight(); - // If scheduling one of the node will cause a pipeline stall, delay it. - // If scheduling either one of the node will cause a pipeline stall, sort them - // according to their height. - // If neither will cause a pipeline stall, try to reduce register pressure. - if (LStall) { - if (!RStall) - return true; - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - } else if (RStall) + bool LHigh = SPQ->HighRegPressure(left); + bool RHigh = SPQ->HighRegPressure(right); + // Avoid causing spills. If register pressure is high, schedule for + // register pressure reduction. + if (LHigh && !RHigh) + return true; + else if (!LHigh && RHigh) + return false; + else if (!LHigh && !RHigh) { + // Low register pressure situation, schedule for latency if possible. + bool LStall = left->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < left->getHeight(); + bool RStall = right->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < right->getHeight(); + // If scheduling one of the node will cause a pipeline stall, delay it. + // If scheduling either one of the node will cause a pipeline stall, sort + // them according to their height. + // If neither will cause a pipeline stall, try to reduce register pressure. + if (LStall) { + if (!RStall) + return true; + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + } else if (RStall) return false; - // If either node is scheduling for latency, sort them by height and latency - // first. - if (left->SchedulingPref == Sched::Latency || - right->SchedulingPref == Sched::Latency) { - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - if (left->Latency != right->Latency) - return left->Latency > right->Latency; + // If either node is scheduling for latency, sort them by height and latency + // first. + if (left->SchedulingPref == Sched::Latency || + right->SchedulingPref == Sched::Latency) { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + if (left->Latency != right->Latency) + return left->Latency > right->Latency; + } + } + + return BURRSort(left, right, SPQ); +} + +bool ilp_ls_rr_sort::operator()(const SUnit *left, + const SUnit *right) const { + bool LHigh = SPQ->HighRegPressure(left); + bool RHigh = SPQ->HighRegPressure(right); + // Avoid causing spills. If register pressure is high, schedule for + // register pressure reduction. + if (LHigh && !RHigh) + return true; + else if (!LHigh && RHigh) + return false; + else if (!LHigh && !RHigh) { + // Low register pressure situation, schedule to maximize instruction level + // parallelism. + if (left->NumPreds > right->NumPreds) + return false; + else if (left->NumPreds < right->NumPreds) + return false; } return BURRSort(left, right, SPQ); @@ -1635,8 +1957,8 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); - + BURegReductionPriorityQueue *PQ = + new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); PQ->setScheduleDAG(SD); return SD; @@ -1648,8 +1970,8 @@ llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); - + TDRegReductionPriorityQueue *PQ = + new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); PQ->setScheduleDAG(SD); return SD; @@ -1661,8 +1983,8 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI); - + SrcRegReductionPriorityQueue *PQ = + new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); PQ->setScheduleDAG(SD); return SD; @@ -1673,9 +1995,24 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetLowering *TLI = &IS->getTargetLowering(); - HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI); + HybridBURRPriorityQueue *PQ = + new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + PQ->setScheduleDAG(SD); + return SD; +} +llvm::ScheduleDAGSDNodes * +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + const TargetMachine &TM = IS->TM; + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetLowering *TLI = &IS->getTargetLowering(); + + ILPBURRPriorityQueue *PQ = + new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); PQ->setScheduleDAG(SD); return SD; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 06cf053087550..f1bf82ab145a4 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -59,8 +59,9 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { SUnits.back().OrigNode = &SUnits.back(); SUnit *SU = &SUnits.back(); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - if (N->isMachineOpcode() && - N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) + if (!N || + (N->isMachineOpcode() && + N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)) SU->SchedulingPref = Sched::None; else SU->SchedulingPref = TLI.getSchedulingPreference(N); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e83a0346b5351..ad06ebda5b007 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2236,7 +2236,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { // If we're told that NaNs won't happen, assume they won't. - if (FiniteOnlyFPMath()) + if (NoNaNsFPMath) return true; // If the value is a constant, we can obviously see if it is a NaN or not. @@ -2281,35 +2281,6 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { } -/// getShuffleScalarElt - Returns the scalar element that will make up the ith -/// element of the result of the vector shuffle. -SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, - unsigned i) { - EVT VT = N->getValueType(0); - if (N->getMaskElt(i) < 0) - return getUNDEF(VT.getVectorElementType()); - unsigned Index = N->getMaskElt(i); - unsigned NumElems = VT.getVectorNumElements(); - SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1); - Index %= NumElems; - - if (V.getOpcode() == ISD::BIT_CONVERT) { - V = V.getOperand(0); - EVT VVT = V.getValueType(); - if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems) - return SDValue(); - } - if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) - return (Index == 0) ? V.getOperand(0) - : getUNDEF(VT.getVectorElementType()); - if (V.getOpcode() == ISD::BUILD_VECTOR) - return V.getOperand(Index); - if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V)) - return getShuffleScalarElt(SVN, Index); - return SDValue(); -} - - /// getNode - Gets or creates the specified node. /// SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { @@ -2624,7 +2595,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // one big BUILD_VECTOR. if (N1.getOpcode() == ISD::BUILD_VECTOR && N2.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), + N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); } @@ -3021,7 +2993,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N1.getOpcode() == ISD::BUILD_VECTOR && N2.getOpcode() == ISD::BUILD_VECTOR && N3.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); + SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), + N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); @@ -5872,6 +5845,7 @@ std::string ISD::ArgFlagsTy::getArgFlagsString() { void SDNode::dump() const { dump(0); } void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); + dbgs() << '\n'; } void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { @@ -5895,7 +5869,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), e = MN->memoperands_end(); i != e; ++i) { OS << **i; - if (next(i) != e) + if (llvm::next(i) != e) OS << " "; } OS << ">"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 458e865a6b3c8..e65744592c8bb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -70,22 +70,29 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, unsigned NumParts, + EVT PartVT, EVT ValueVT); + /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, +static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, EVT PartVT, EVT ValueVT, ISD::NodeType AssertOp = ISD::DELETED_NODE) { + if (ValueVT.isVector()) + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); + assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; if (NumParts > 1) { // Assemble the value from multiple parts. - if (!ValueVT.isVector() && ValueVT.isInteger()) { + if (ValueVT.isInteger()) { unsigned PartBits = PartVT.getSizeInBits(); unsigned ValueBits = ValueVT.getSizeInBits(); @@ -100,25 +107,25 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); if (RoundParts > 2) { - Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2, + Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, PartVT, HalfVT); - Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2, + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, RoundParts / 2, PartVT, HalfVT); } else { - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]); + Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]); } if (TLI.isBigEndian()) std::swap(Lo, Hi); - Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); if (RoundParts < NumParts) { // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); - Hi = getCopyFromParts(DAG, dl, + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT, OddVT); // Combine the round and odd parts. @@ -126,68 +133,29 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, if (TLI.isBigEndian()) std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi); - Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi, + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); + Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, DAG.getConstant(Lo.getValueType().getSizeInBits(), TLI.getPointerTy())); - Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo); - Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi); - } - } else if (ValueVT.isVector()) { - // Handle a multi-element vector. - EVT IntermediateVT, RegisterVT; - unsigned NumIntermediates; - unsigned NumRegs = - TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, - NumIntermediates, RegisterVT); - assert(NumRegs == NumParts - && "Part count doesn't match vector breakdown!"); - NumParts = NumRegs; // Silence a compiler warning. - assert(RegisterVT == PartVT - && "Part type doesn't match vector breakdown!"); - assert(RegisterVT == Parts[0].getValueType() && - "Part type doesn't match part!"); - - // Assemble the parts into intermediate operands. - SmallVector<SDValue, 8> Ops(NumIntermediates); - if (NumIntermediates == NumParts) { - // If the register was not expanded, truncate or copy the value, - // as appropriate. - for (unsigned i = 0; i != NumParts; ++i) - Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1, - PartVT, IntermediateVT); - } else if (NumParts > 0) { - // If the intermediate type was expanded, build the intermediate - // operands from the parts. - assert(NumParts % NumIntermediates == 0 && - "Must expand into a divisible number of parts!"); - unsigned Factor = NumParts / NumIntermediates; - for (unsigned i = 0; i != NumIntermediates; ++i) - Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor, - PartVT, IntermediateVT); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); + Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); } - - // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the - // intermediate operands. - Val = DAG.getNode(IntermediateVT.isVector() ? - ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl, - ValueVT, &Ops[0], NumIntermediates); } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && "Unexpected split"); SDValue Lo, Hi; - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]); + Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { // FP split into integer parts (soft fp) assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); - Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT); } } @@ -197,219 +165,315 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, if (PartVT == ValueVT) return Val; - if (PartVT.isVector()) { - assert(ValueVT.isVector() && "Unknown vector conversion!"); - return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); - } - - if (ValueVT.isVector()) { - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && - "Only trivial scalar-to-vector conversions should get here!"); - return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val); - } - - if (PartVT.isInteger() && - ValueVT.isInteger()) { + if (PartVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. if (AssertOp != ISD::DELETED_NODE) - Val = DAG.getNode(AssertOp, dl, PartVT, Val, + Val = DAG.getNode(AssertOp, DL, PartVT, Val, DAG.getValueType(ValueVT)); - return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); - } else { - return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val); + return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } + return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); } if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { - if (ValueVT.bitsLT(Val.getValueType())) { - // FP_ROUND's are always exact here. - return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val, + // FP_ROUND's are always exact here. + if (ValueVT.bitsLT(Val.getValueType())) + return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, DAG.getIntPtrConstant(1)); - } - return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val); + return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) - return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); + return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); return SDValue(); } +/// getCopyFromParts - Create a value that contains the specified legal parts +/// combined into the value they represent. If the parts combine to a type +/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT +/// (ISD::AssertSext). +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, + const SDValue *Parts, unsigned NumParts, + EVT PartVT, EVT ValueVT) { + assert(ValueVT.isVector() && "Not a vector value"); + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + // Handle a multi-element vector. + if (NumParts > 1) { + EVT IntermediateVT, RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + assert(RegisterVT == Parts[0].getValueType() && + "Part type doesn't match part!"); + + // Assemble the parts into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + if (NumIntermediates == NumParts) { + // If the register was not expanded, truncate or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, + PartVT, IntermediateVT); + } else if (NumParts > 0) { + // If the intermediate type was expanded, build the intermediate + // operands from the parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, + PartVT, IntermediateVT); + } + + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the + // intermediate operands. + Val = DAG.getNode(IntermediateVT.isVector() ? + ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, + ValueVT, &Ops[0], NumIntermediates); + } + + // There is now one part, held in Val. Correct it to match ValueVT. + PartVT = Val.getValueType(); + + if (PartVT == ValueVT) + return Val; + + if (PartVT.isVector()) { + // If the element type of the source/dest vectors are the same, but the + // parts vector has more elements than the value vector, then we have a + // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the + // elements we want. + if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) { + assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + "Cannot narrow, it would be a lossy transformation"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getIntPtrConstant(0)); + } + + // Vector/Vector bitcast. + return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); + } + + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial scalar-to-vector conversions should get here!"); + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); +} + + + + +static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, + SDValue Val, SDValue *Parts, unsigned NumParts, + EVT PartVT); + /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, +static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, EVT PartVT, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PtrVT = TLI.getPointerTy(); EVT ValueVT = Val.getValueType(); + + // Handle the vector case separately. + if (ValueVT.isVector()) + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); - if (!NumParts) + if (NumParts == 0) return; - if (!ValueVT.isVector()) { - if (PartVT == ValueVT) { - assert(NumParts == 1 && "No-op copy with multiple parts!"); - Parts[0] = Val; - return; - } - - if (NumParts * PartBits > ValueVT.getSizeInBits()) { - // If the parts cover more bits than the value has, promote the value. - if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { - assert(NumParts == 1 && "Do not know what to promote to!"); - Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val); - } else if (PartVT.isInteger() && ValueVT.isInteger()) { - ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Val = DAG.getNode(ExtendKind, dl, ValueVT, Val); - } else { - llvm_unreachable("Unknown mismatch!"); - } - } else if (PartBits == ValueVT.getSizeInBits()) { - // Different types of the same size. - assert(NumParts == 1 && PartVT != ValueVT); - Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); - } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { - // If the parts cover less bits than value has, truncate the value. - if (PartVT.isInteger() && ValueVT.isInteger()) { - ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); - } else { - llvm_unreachable("Unknown mismatch!"); - } - } - - // The value may have changed - recompute ValueVT. - ValueVT = Val.getValueType(); - assert(NumParts * PartBits == ValueVT.getSizeInBits() && - "Failed to tile the value with PartVT!"); - - if (NumParts == 1) { - assert(PartVT == ValueVT && "Type conversion failed!"); - Parts[0] = Val; - return; - } + assert(!ValueVT.isVector() && "Vector case handled elsewhere"); + if (PartVT == ValueVT) { + assert(NumParts == 1 && "No-op copy with multiple parts!"); + Parts[0] = Val; + return; + } - // Expand the value into multiple parts. - if (NumParts & (NumParts - 1)) { - // The number of parts is not a power of 2. Split off and copy the tail. + if (NumParts * PartBits > ValueVT.getSizeInBits()) { + // If the parts cover more bits than the value has, promote the value. + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + assert(NumParts == 1 && "Do not know what to promote to!"); + Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); + } else { assert(PartVT.isInteger() && ValueVT.isInteger() && - "Do not know what to expand to!"); - unsigned RoundParts = 1 << Log2_32(NumParts); - unsigned RoundBits = RoundParts * PartBits; - unsigned OddParts = NumParts - RoundParts; - SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val, - DAG.getConstant(RoundBits, - TLI.getPointerTy())); - getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, - OddParts, PartVT); - - if (TLI.isBigEndian()) - // The odd parts were reversed by getCopyToParts - unreverse them. - std::reverse(Parts + RoundParts, Parts + NumParts); - - NumParts = RoundParts; + "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); - Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); + Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); } + } else if (PartBits == ValueVT.getSizeInBits()) { + // Different types of the same size. + assert(NumParts == 1 && PartVT != ValueVT); + Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { + // If the parts cover less bits than value has, truncate the value. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Unknown mismatch!"); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + // The value may have changed - recompute ValueVT. + ValueVT = Val.getValueType(); + assert(NumParts * PartBits == ValueVT.getSizeInBits() && + "Failed to tile the value with PartVT!"); - // The number of parts is a power of 2. Repeatedly bisect the value using - // EXTRACT_ELEMENT. - Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl, - EVT::getIntegerVT(*DAG.getContext(), - ValueVT.getSizeInBits()), - Val); - - for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { - for (unsigned i = 0; i < NumParts; i += StepSize) { - unsigned ThisBits = StepSize * PartBits / 2; - EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); - SDValue &Part0 = Parts[i]; - SDValue &Part1 = Parts[i+StepSize/2]; - - Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, - ThisVT, Part0, - DAG.getConstant(1, PtrVT)); - Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, - ThisVT, Part0, - DAG.getConstant(0, PtrVT)); - - if (ThisBits == PartBits && ThisVT != PartVT) { - Part0 = DAG.getNode(ISD::BIT_CONVERT, dl, - PartVT, Part0); - Part1 = DAG.getNode(ISD::BIT_CONVERT, dl, - PartVT, Part1); - } + if (NumParts == 1) { + assert(PartVT == ValueVT && "Type conversion failed!"); + Parts[0] = Val; + return; + } + + // Expand the value into multiple parts. + if (NumParts & (NumParts - 1)) { + // The number of parts is not a power of 2. Split off and copy the tail. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Do not know what to expand to!"); + unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundBits = RoundParts * PartBits; + unsigned OddParts = NumParts - RoundParts; + SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, + DAG.getIntPtrConstant(RoundBits)); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT); + + if (TLI.isBigEndian()) + // The odd parts were reversed by getCopyToParts - unreverse them. + std::reverse(Parts + RoundParts, Parts + NumParts); + + NumParts = RoundParts; + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + // The number of parts is a power of 2. Repeatedly bisect the value using + // EXTRACT_ELEMENT. + Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL, + EVT::getIntegerVT(*DAG.getContext(), + ValueVT.getSizeInBits()), + Val); + + for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { + for (unsigned i = 0; i < NumParts; i += StepSize) { + unsigned ThisBits = StepSize * PartBits / 2; + EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); + SDValue &Part0 = Parts[i]; + SDValue &Part1 = Parts[i+StepSize/2]; + + Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(1)); + Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(0)); + + if (ThisBits == PartBits && ThisVT != PartVT) { + Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0); + Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1); } } + } - if (TLI.isBigEndian()) - std::reverse(Parts, Parts + OrigNumParts); + if (TLI.isBigEndian()) + std::reverse(Parts, Parts + OrigNumParts); +} - return; - } - // Vector ValueVT. +/// getCopyToPartsVector - Create a series of nodes that contain the specified +/// value split into legal parts. +static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, + SDValue Val, SDValue *Parts, unsigned NumParts, + EVT PartVT) { + EVT ValueVT = Val.getValueType(); + assert(ValueVT.isVector() && "Not a vector"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (NumParts == 1) { - if (PartVT != ValueVT) { - if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { - Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val); - } else { - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && - "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - PartVT, Val, - DAG.getConstant(0, PtrVT)); - } - } + if (PartVT == ValueVT) { + // Nothing to do. + } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { + // Bitconvert vector->vector case. + Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + } else if (PartVT.isVector() && + PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { + EVT ElementVT = PartVT.getVectorElementType(); + // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in + // undef elements. + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + ElementVT, Val, DAG.getIntPtrConstant(i))); + + for (unsigned i = ValueVT.getVectorNumElements(), + e = PartVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getUNDEF(ElementVT)); + + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); + // FIXME: Use CONCAT for 2x -> 4x. + + //SDValue UndefElts = DAG.getUNDEF(VectorTy); + //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); + } else { + // Vector -> scalar conversion. + assert(ValueVT.getVectorElementType() == PartVT && + ValueVT.getVectorNumElements() == 1 && + "Only trivial vector-to-scalar conversions should get here!"); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + PartVT, Val, DAG.getIntPtrConstant(0)); + } + Parts[0] = Val; return; } - + // Handle a multi-element vector. EVT IntermediateVT, RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, - IntermediateVT, NumIntermediates, RegisterVT); + IntermediateVT, + NumIntermediates, RegisterVT); unsigned NumElements = ValueVT.getVectorNumElements(); - + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); - + // Split the vector into intermediate operands. SmallVector<SDValue, 8> Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { if (IntermediateVT.isVector()) - Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getConstant(i * (NumElements / NumIntermediates), - PtrVT)); + DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); else - Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - IntermediateVT, Val, - DAG.getConstant(i, PtrVT)); + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + IntermediateVT, Val, DAG.getIntPtrConstant(i)); } - + // Split the intermediate operands into legal parts. if (NumParts == NumIntermediates) { // If the register was not expanded, promote or copy the value, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) - getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT); } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. @@ -417,10 +481,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) - getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT); + getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT); } } + + + namespace { /// RegsForValue - This struct represents the registers (physical or virtual) /// that a particular set of values is assigned, and the type information @@ -460,11 +527,6 @@ namespace { EVT regvt, EVT valuevt) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - RegsForValue(const SmallVector<unsigned, 4> ®s, - const SmallVector<EVT, 4> ®vts, - const SmallVector<EVT, 4> &valuevts) - : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} - RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg, const Type *Ty) { ComputeValueVTs(tli, Ty, ValueVTs); @@ -530,6 +592,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, DebugLoc dl, SDValue &Chain, SDValue *Flag) const { + // A Value with type {} or [0 x %t] needs no registers. + if (ValueVTs.empty()) + return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Assemble the legal parts into the final values. @@ -623,8 +689,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); EVT RegisterVT = RegVTs[Value]; - getCopyToParts(DAG, dl, - Val.getValue(Val.getResNo() + Value), + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT); Part += NumParts; } @@ -701,6 +766,7 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); + DanglingDebugInfoMap.clear(); CurDebugLoc = DebugLoc(); HasTailCall = false; } @@ -805,6 +871,33 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, +// generate the debug data structures now that we've seen its definition. +void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, + SDValue Val) { + DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; + if (DDI.getDI()) { + const DbgValueInst *DI = DDI.getDI(); + DebugLoc dl = DDI.getdl(); + unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); + MDNode *Variable = DI->getVariable(); + uint64_t Offset = DI->getOffset(); + SDDbgValue *SDV; + if (Val.getNode()) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) { + SDV = DAG.getDbgValue(Variable, Val.getNode(), + Val.getResNo(), Offset, dl, DbgSDNodeOrder); + DAG.AddDbgValue(SDV, Val.getNode(), false); + } + } else { + SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), + Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } + DanglingDebugInfoMap[V] = DanglingDebugInfo(); + } +} + // getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { // If we already have an SDValue for this value, use it. It's important @@ -826,6 +919,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); return Val; } @@ -839,10 +933,11 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); return Val; } -/// getValueImpl - Helper function for getValue and getMaterializedValue. +/// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const Constant *C = dyn_cast<Constant>(V)) { @@ -986,10 +1081,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); - EVT PtrVT = PtrValueVTs[0]; for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr, - DAG.getConstant(Offsets[i], PtrVT)); + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), + RetPtr.getValueType(), RetPtr, + DAG.getIntPtrConstant(Offsets[i])); Chains[i] = DAG.getStore(Chain, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), @@ -2709,11 +2804,6 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } Ty = StTy->getElementType(Field); - } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) { - unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); - - // Offset canonically 0 for unions, but type changes - Ty = UnTy->getElementType(Field); } else { Ty = cast<SequentialType>(Ty)->getElementType(); @@ -2818,7 +2908,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Inform the Frame Information that we have just allocated a variable-sized // object. - FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(); + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { @@ -3824,11 +3914,11 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. bool -SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, - const Value *V, MDNode *Variable, - uint64_t Offset, +SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + int64_t Offset, const SDValue &N) { - if (!isa<Argument>(V)) + const Argument *Arg = dyn_cast<Argument>(V); + if (!Arg) return false; MachineFunction &MF = DAG.getMachineFunction(); @@ -3842,7 +3932,15 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, return false; unsigned Reg = 0; - if (N.getOpcode() == ISD::CopyFromReg) { + if (Arg->hasByValAttr()) { + // Byval arguments' frame index is recorded during argument lowering. + // Use this info directly. + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + Reg = TRI->getFrameRegister(MF); + Offset = FuncInfo.getByValArgumentFrameIndex(Arg); + } + + if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) { Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); @@ -3966,42 +4064,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); - if (!DIVariable(DI.getVariable()).Verify()) - return 0; - MDNode *Variable = DI.getVariable(); - // Parameters are handled specially. - bool isParameter = - DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; const Value *Address = DI.getAddress(); - if (!Address) + if (!Address || !DIVariable(DI.getVariable()).Verify()) return 0; - if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) - Address = BCI->getOperand(0); - const AllocaInst *AI = dyn_cast<AllocaInst>(Address); - if (AI) { - // Don't handle byval arguments or VLAs, for example. - // Non-byval arguments are handled here (they refer to the stack temporary - // alloca at this point). - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI == FuncInfo.StaticAllocaMap.end()) - return 0; // VLAs. - int FI = SI->second; - - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) - MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); - } // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder // but do not always have a corresponding SDNode built. The SDNodeOrder // absolute, but not relative, values are different depending on whether // debug info exists. ++SDNodeOrder; + + // Check if address has undef value. + if (isa<UndefValue>(Address) || + (Address->use_empty() && !isa<Argument>(Address))) { + SDDbgValue*SDV = + DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + return 0; + } + SDValue &N = NodeMap[Address]; + if (!N.getNode() && isa<Argument>(Address)) + // Check unused arguments map. + N = UnusedArgNodeMap[Address]; SDDbgValue *SDV; if (N.getNode()) { + // Parameters are handled specially. + bool isParameter = + DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + const AllocaInst *AI = dyn_cast<AllocaInst>(Address); + if (isParameter && !AI) { FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (FINode) @@ -4020,10 +4116,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { - // This isn't useful, but it shows what we're missing. - SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, isParameter); + // If Address is an arugment then try to emits its dbg value using + // virtual register info from the FuncInfo.ValueMap. Otherwise add undef + // to help track missing debug info. + if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { + SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + } } return 0; } @@ -4048,31 +4148,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, 0, false); } else { - bool createUndef = false; - // FIXME : Why not use getValue() directly ? + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. SDValue N = NodeMap[V]; if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) { SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } - } else if (isa<PHINode>(V) && !V->use_empty()) { - SDValue N = getValue(V); - if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { - SDV = DAG.getDbgValue(Variable, N.getNode(), - N.getResNo(), Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, N.getNode(), false); - } - } else - createUndef = true; - } else - createUndef = true; - if (createUndef) { + } else if (isa<PHINode>(V) && !V->use_empty() ) { + // Do not call getValue(V) yet, as we don't want to generate code. + // Remember it for later. + DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); + DanglingDebugInfoMap[V] = DDI; + } else { // We may expand this to cover more cases. One case where we have no // data available is an unreferenced parameter; we need this fallback. SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), @@ -4572,6 +4665,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI)) isTailCall = false; + // If there's a possibility that fast-isel has already selected some amount + // of the current basic block, don't emit a tail call. + if (isTailCall && EnableFastISel) + isTailCall = false; + std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(getRoot(), RetTy, CS.paramHasAttr(0, Attribute::SExt), @@ -6054,6 +6152,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { i += NumParts; } + // Note down frame index for byval arguments. + if (I->hasByValAttr() && !ArgValues.empty()) + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) + FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex()); + if (!I->use_empty()) { SDValue Res; if (!ArgValues.empty()) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 46733d6db1241..5f400e9c83acb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,9 +18,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#ifndef NDEBUG -#include "llvm/ADT/SmallSet.h" -#endif #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/CallSite.h" @@ -64,6 +61,7 @@ class PHINode; class PtrToIntInst; class ReturnInst; class SDISelAsmOperandInfo; +class SDDbgValue; class SExtInst; class SelectInst; class ShuffleVectorInst; @@ -93,6 +91,24 @@ class SelectionDAGBuilder { /// to preserve debug information for incoming arguments. DenseMap<const Value*, SDValue> UnusedArgNodeMap; + /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap. + class DanglingDebugInfo { + const DbgValueInst* DI; + DebugLoc dl; + unsigned SDNodeOrder; + public: + DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { } + DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : + DI(di), dl(DL), SDNodeOrder(SDNO) { } + const DbgValueInst* getDI() { return DI; } + DebugLoc getdl() { return dl; } + unsigned getSDNodeOrder() { return SDNodeOrder; } + }; + + /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not + /// yet seen the referent. We defer handling these until we do see it. + DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap; + public: /// PendingLoads - Loads are not emitted to the program immediately. We bunch /// them up and then emit token factor nodes when possible. This allows us to @@ -345,6 +361,9 @@ public: void visit(unsigned Opcode, const User &I); + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, + // generate the debug data structures now that we've seen its definition. + void resolveDanglingDebugInfo(const Value *V, SDValue Val); SDValue getValue(const Value *V); SDValue getNonRegisterValue(const Value *V); SDValue getValueImpl(const Value *V); @@ -506,13 +525,11 @@ private: void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); - /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a - /// function argument, create the corresponding DBG_VALUE machine instruction - /// for it now. At the end of instruction selection, they will be inserted to - /// the entry BB. - bool EmitFuncArgumentDbgValue(const DbgValueInst &DI, - const Value *V, MDNode *Variable, - uint64_t Offset, const SDValue &N); + /// EmitFuncArgumentDbgValue - If V is an function argument then create + /// corresponding DBG_VALUE machine instruction for it now. At the end of + /// instruction selection, they will be inserted to the entry BB. + bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + int64_t Offset, const SDValue &N); }; } // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 08ba5482f7d22..66cb5ceb09e53 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -132,14 +132,16 @@ namespace llvm { const TargetLowering &TLI = IS->getTargetLowering(); if (OptLevel == CodeGenOpt::None) - return createFastDAGScheduler(IS, OptLevel); + return createSourceListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::Latency) return createTDListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::RegPressure) return createBURRListDAGScheduler(IS, OptLevel); - assert(TLI.getSchedulingPreference() == Sched::Hybrid && + if (TLI.getSchedulingPreference() == Sched::Hybrid) + return createHybridListDAGScheduler(IS, OptLevel); + assert(TLI.getSchedulingPreference() == Sched::ILP && "Unknown sched type!"); - return createHybridListDAGScheduler(IS, OptLevel); + return createILPListDAGScheduler(IS, OptLevel); } } @@ -169,7 +171,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, //===----------------------------------------------------------------------===// SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : - MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), + MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), CurDAG(new SelectionDAG(tm)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), @@ -216,7 +218,7 @@ static bool FunctionCallsSetJmp(const Function *F) { for (Value::const_use_iterator I = Callee->use_begin(), E = Callee->use_end(); I != E; ++I) - if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const CallInst *CI = dyn_cast<CallInst>(*I)) if (CI->getParent()->getParent() == F) return true; } @@ -362,38 +364,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, CodeGenAndEmitDAG(); } -namespace { -/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted -/// nodes from the worklist. -class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener { - SmallVector<SDNode*, 128> &Worklist; - SmallPtrSet<SDNode*, 128> &InWorklist; -public: - SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl, - SmallPtrSet<SDNode*, 128> &inwl) - : Worklist(wl), InWorklist(inwl) {} - - void RemoveFromWorklist(SDNode *N) { - if (!InWorklist.erase(N)) return; - - SmallVector<SDNode*, 128>::iterator I = - std::find(Worklist.begin(), Worklist.end(), N); - assert(I != Worklist.end() && "Not in worklist"); - - *I = Worklist.back(); - Worklist.pop_back(); - } - - virtual void NodeDeleted(SDNode *N, SDNode *E) { - RemoveFromWorklist(N); - } - - virtual void NodeUpdated(SDNode *N) { - // Ignore updates. - } -}; -} - void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet<SDNode*, 128> VisitedNodes; SmallVector<SDNode*, 128> Worklist; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 6cae804422ce6..8313de5e32bba 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -199,7 +199,7 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { #else errs() << "SelectionDAG::getGraphAttrs is only available in debug builds" << " on systems with Graphviz or gv!\n"; - return std::string(""); + return std::string(); #endif } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4f3866956cac5..b74f600cfa2db 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -651,6 +651,53 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, return NumVectorRegs; } +/// isLegalRC - Return true if the value types that can be represented by the +/// specified register class are all legal. +bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) + return true; + } + return false; +} + +/// hasLegalSuperRegRegClasses - Return true if the specified register class +/// has one or more super-reg register classes that are legal. +bool +TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{ + if (*RC->superregclasses_begin() == 0) + return false; + for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(), + E = RC->superregclasses_end(); I != E; ++I) { + const TargetRegisterClass *RRC = *I; + if (isLegalRC(RRC)) + return true; + } + return false; +} + +/// findRepresentativeClass - Return the largest legal super-reg register class +/// of the register class for the specified type and its associated "cost". +std::pair<const TargetRegisterClass*, uint8_t> +TargetLowering::findRepresentativeClass(EVT VT) const { + const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy]; + if (!RC) + return std::make_pair(RC, 0); + const TargetRegisterClass *BestRC = RC; + for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(), + E = RC->superregclasses_end(); I != E; ++I) { + const TargetRegisterClass *RRC = *I; + if (RRC->isASubClass() || !isLegalRC(RRC)) + continue; + if (!hasLegalSuperRegRegClasses(RRC)) + return std::make_pair(RRC, 1); + BestRC = RRC; + } + return std::make_pair(BestRC, 1); +} + + /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. void TargetLowering::computeRegisterProperties() { @@ -736,6 +783,28 @@ void TargetLowering::computeRegisterProperties() { MVT VT = (MVT::SimpleValueType)i; if (isTypeLegal(VT)) continue; + // Determine if there is a legal wider type. If so, we should promote to + // that wider vector type. + EVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + if (NElts != 1) { + bool IsLegalWiderType = false; + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + EVT SVT = (MVT::SimpleValueType)nVT; + if (SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && + isTypeSynthesizable(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, Promote); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) continue; + } + MVT IntermediateVT; EVT RegisterVT; unsigned NumIntermediates; @@ -744,32 +813,29 @@ void TargetLowering::computeRegisterProperties() { RegisterVT, this); RegisterTypeForVT[i] = RegisterVT; - // Determine if there is a legal wider type. - bool IsLegalWiderType = false; - EVT EltVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - EVT SVT = (MVT::SimpleValueType)nVT; - if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && NElts != 1) { - TransformToType[i] = SVT; - ValueTypeActions.setTypeAction(VT, Promote); - IsLegalWiderType = true; - break; - } - } - if (!IsLegalWiderType) { - EVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - ValueTypeActions.setTypeAction(VT, Expand); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, Promote); - } + EVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + ValueTypeActions.setTypeAction(VT, Expand); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, Promote); } } + + // Determine the 'representative' register class for each value type. + // An representative register class is the largest (meaning one which is + // not a sub-register class / subreg register class) legal register class for + // a group of value types. For example, on i386, i8, i16, and i32 + // representative would be GR32; while on x86_64 it's GR64. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + const TargetRegisterClass* RRC; + uint8_t Cost; + tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); + RepRegClassForVT[i] = RRC; + RepRegClassCostForVT[i] = Cost; + } } const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -798,8 +864,21 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, EVT &RegisterVT) const { - // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); + + // If there is a wider vector type with the same element type as this one, + // we should widen to that legal vector type. This handles things like + // <2 x float> -> <4 x float>. + if (NumElts != 1 && getTypeAction(VT) == Promote) { + RegisterVT = getTypeToTransformTo(Context, VT); + if (isTypeLegal(RegisterVT)) { + IntermediateVT = RegisterVT; + NumIntermediates = 1; + return 1; + } + } + + // Figure out the right, legal destination reg to copy into. EVT EltTy = VT.getVectorElementType(); unsigned NumVectorRegs = 1; @@ -828,16 +907,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, EVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; - if (DestVT.bitsLT(NewVT)) { - // Value is expanded, e.g. i64 -> i16. + if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); - } else { - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; - } - return 1; + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; } /// Get the EVTs and ArgFlags collections that represent the legalized return @@ -1308,9 +1383,32 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt), + if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), KnownZero, KnownOne, TLO, Depth+1)) return true; + + // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits + // are not demanded. This will likely allow the anyext to be folded away. + if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { + SDValue InnerOp = InOp.getNode()->getOperand(0); + EVT InnerVT = InnerOp.getValueType(); + if ((APInt::getHighBitsSet(BitWidth, + BitWidth - InnerVT.getSizeInBits()) & + DemandedMask) == 0 && + isTypeDesirableForOp(ISD::SHL, InnerVT)) { + EVT ShTy = getShiftAmountTy(); + if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) + ShTy = InnerVT; + SDValue NarrowShl = + TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, + TLO.DAG.getConstant(ShAmt, ShTy)); + return + TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), + NarrowShl)); + } + } + KnownZero <<= SA->getZExtValue(); KnownOne <<= SA->getZExtValue(); // low bits known zero. @@ -1415,11 +1513,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getScalarType().getSizeInBits()) & - NewMask; + BitWidth - EVT.getScalarType().getSizeInBits()); // If none of the extended bits are demanded, eliminate the sextinreg. - if (NewBits == 0) + if ((NewBits & NewMask) == 0) return TLO.CombineTo(Op, Op.getOperand(0)); APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits()); @@ -1886,12 +1983,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT ExtDstTy = N0.getValueType(); unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); - // If the extended part has any inconsistent bits, it cannot ever - // compare equal. In other words, they have to be all ones or all - // zeros. - APInt ExtBits = - APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits); - if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits) + // If the constant doesn't fit into the number of bits for the source of + // the sign extension, it is impossible for both sides to be equal. + if (C1.getMinSignedBits() > ExtSrcTyBits) return DAG.getConstant(Cond == ISD::SETNE, VT); SDValue ZextOp; @@ -2476,7 +2570,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, int64_t Offs = GA->getOffset(); if (C) Offs += C->getZExtValue(); Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), - C->getDebugLoc(), + C ? C->getDebugLoc() : DebugLoc(), Op.getValueType(), Offs)); return; } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index e69d3e4fa78aa..b29ea19835bc9 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -59,13 +59,16 @@ DisableCrossClassJoin("disable-cross-class-join", cl::desc("Avoid coalescing cross register class copies"), cl::init(false), cl::Hidden); -static RegisterPass<SimpleRegisterCoalescing> -X("simple-register-coalescing", "Simple Register Coalescing"); +static cl::opt<bool> +DisablePhysicalJoin("disable-physical-join", + cl::desc("Avoid coalescing physical register copies"), + cl::init(false), cl::Hidden); -// Declare that we implement the RegisterCoalescer interface -static RegisterAnalysisGroup<RegisterCoalescer, true/*The Default*/> V(X); +INITIALIZE_AG_PASS(SimpleRegisterCoalescing, RegisterCoalescer, + "simple-register-coalescing", "Simple Register Coalescing", + false, false, true); -const PassInfo *const llvm::SimpleRegisterCoalescingID = &X; +char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID; void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -386,16 +389,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) return false; - bool BHasSubRegs = false; - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - BHasSubRegs = *tri_->getSubRegisters(IntB.reg); - - // Abort if the subregisters of IntB.reg have values that are not simply the + // Abort if the aliases of IntB.reg have values that are not simply the // clobbers from the superreg. - if (BHasSubRegs) - for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && - HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0)) + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && + HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0)) return false; // If some of the uses of IntA.reg is already coalesced away, return false. @@ -412,6 +411,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, return false; } + DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << *DefMI); + // At this point we have decided that it is legal to do this // transformation. Start by commuting the instruction. MachineBasicBlock *MBB = DefMI->getParent(); @@ -470,16 +471,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, if (Extended) UseMO.setIsKill(false); } - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (UseMI->isCopy()) { - if (UseMI->getOperand(0).getReg() != IntB.reg || - UseMI->getOperand(0).getSubReg()) - continue; - } else if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){ - if (DstReg != IntB.reg || DstSubIdx) - continue; - } else + if (!UseMI->isCopy()) continue; + if (UseMI->getOperand(0).getReg() != IntB.reg || + UseMI->getOperand(0).getSubReg()) + continue; + // This copy will become a noop. If it's defining a new val#, // remove that val# as well. However this live range is being // extended to the end of the existing live range defined by the copy. @@ -504,13 +501,13 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, // Remove val#'s defined by copies that will be coalesced away. for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) { VNInfo *DeadVNI = BDeadValNos[i]; - if (BHasSubRegs) { - for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) { + if (!li_->hasInterval(*AS)) continue; - LiveInterval &SRLI = li_->getInterval(*SR); - if (const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def)) - SRLI.removeValNo(SRLR->valno); + LiveInterval &ASLI = li_->getInterval(*AS); + if (const LiveRange *ASLR = ASLI.getLiveRangeContaining(DeadVNI->def)) + ASLI.removeValNo(ASLR->valno); } } IntB.removeValNo(BDeadValNos[i]); @@ -628,14 +625,6 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx, if (DefMO.getReg() == li.reg && !DefMO.getSubReg()) DefMO.setIsDead(); } - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - DstReg == li.reg && DstSubIdx == 0) { - // Last use is itself an identity code. - int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, - false, false, tri_); - LastUseMI->getOperand(DeadIdx).setIsDead(); - } return true; } @@ -772,16 +761,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { // A PhysReg copy that won't be coalesced can perhaps be rematerialized // instead. if (DstIsPhys) { - unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; - if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, - CopySrcSubIdx, CopyDstSubIdx) && - CopySrcSubIdx == 0 && CopyDstSubIdx == 0 && - CopySrcReg != CopyDstReg && CopySrcReg == SrcReg && - CopyDstReg != DstReg && !JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, 0, - UseMI)) - continue; - if (UseMI->isCopy() && !UseMI->getOperand(1).getSubReg() && !UseMI->getOperand(0).getSubReg() && @@ -834,28 +813,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { dbgs() << li_->getInstructionIndex(UseMI) << "\t"; dbgs() << *UseMI; }); - - - // After updating the operand, check if the machine instruction has - // become a copy. If so, update its val# information. - const TargetInstrDesc &TID = UseMI->getDesc(); - if (DstIsPhys || TID.getNumDefs() != 1 || TID.getNumOperands() <= 2) - continue; - - unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; - if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, - CopySrcSubIdx, CopyDstSubIdx) && - CopySrcReg != CopyDstReg && - (TargetRegisterInfo::isVirtualRegister(CopyDstReg) || - allocatableRegs_[CopyDstReg])) { - LiveInterval &LI = li_->getInterval(CopyDstReg); - SlotIndex DefIdx = - li_->getInstructionIndex(UseMI).getDefIndex(); - if (const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx)) { - if (DLR->valno->def == DefIdx) - DLR->valno->setCopy(UseMI); - } - } } } @@ -1082,13 +1039,18 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable. } + if (DisablePhysicalJoin && CP.isPhys()) { + DEBUG(dbgs() << "\tPhysical joins disabled.\n"); + return false; + } + DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg()); // Enforce policies. if (CP.isPhys()) { DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n"); // Only coalesce to allocatable physreg. - if (!allocatableRegs_[CP.getDstReg()]) { + if (!li_->isAllocatable(CP.getDstReg())) { DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); return false; // Not coalescable. } @@ -1137,7 +1099,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // happens. if (li_->hasInterval(CP.getDstReg()) && li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { - mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg()); ++numAborts; DEBUG(dbgs() << "\tPhysical register live interval too complicated, abort!\n"); @@ -1156,7 +1117,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI)) return true; - mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg()); ++numAborts; DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. @@ -1543,21 +1503,19 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, MachineInstr *Inst = MII++; // If this isn't a copy nor a extract_subreg, we can't join intervals. - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - bool isInsUndef = false; + unsigned SrcReg, DstReg; if (Inst->isCopy()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(1).getReg(); } else if (Inst->isSubregToReg()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(2).getReg(); - } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + } else continue; bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (isInsUndef || - (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())) + if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) ImpDefCopies.push_back(CopyRec(Inst, 0)); else if (SrcIsPhys || DstIsPhys) PhysCopies.push_back(CopyRec(Inst, 0)); @@ -1679,11 +1637,6 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, MachineInstr *UseMI = Use.getParent(); if (UseMI->isIdentityCopy()) continue; - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - SrcReg == DstReg && SrcSubIdx == DstSubIdx) - // Ignore identity copies. - continue; SlotIndex Idx = li_->getInstructionIndex(UseMI); // FIXME: Should this be Idx != UseIdx? SlotIndex() will return something // that compares higher than any other interval. @@ -1708,10 +1661,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, return NULL; // Ignore identity copies. - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (!MI->isIdentityCopy() && - !(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - SrcReg == DstReg && SrcSubIdx == DstSubIdx)) + if (!MI->isIdentityCopy()) for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { MachineOperand &Use = MI->getOperand(i); if (Use.isReg() && Use.isUse() && Use.getReg() && @@ -1747,7 +1697,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { << "********** Function: " << ((Value*)mf_->getFunction())->getName() << '\n'); - allocatableRegs_ = tri_->getAllocatableSet(fn); for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(), E = tri_->regclass_end(); I != E; ++I) allocatableRCRegs_.insert(std::make_pair(*I, @@ -1775,30 +1724,35 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); mii != mie; ) { MachineInstr *MI = mii; - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (JoinedCopies.count(MI)) { // Delete all coalesced copies. bool DoDelete = true; - if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - assert(MI->isCopyLike() && "Unrecognized copy instruction"); - SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) - // Do not delete extract_subreg, insert_subreg of physical - // registers unless the definition is dead. e.g. - // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 - // or else the scavenger may complain. LowerSubregs will - // delete them later. - DoDelete = false; - } + assert(MI->isCopyLike() && "Unrecognized copy instruction"); + unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + MI->getNumOperands() > 2) + // Do not delete extract_subreg, insert_subreg of physical + // registers unless the definition is dead. e.g. + // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 + // or else the scavenger may complain. LowerSubregs will + // delete them later. + DoDelete = false; + if (MI->allDefsAreDead()) { LiveInterval &li = li_->getInterval(SrcReg); if (!ShortenDeadCopySrcLiveRange(li, MI)) ShortenDeadCopyLiveRange(li, MI); DoDelete = true; } - if (!DoDelete) + if (!DoDelete) { + // We need the instruction to adjust liveness, so make it a KILL. + if (MI->isSubregToReg()) { + MI->RemoveOperand(3); + MI->RemoveOperand(1); + } + MI->setDesc(tii_->get(TargetOpcode::KILL)); mii = llvm::next(mii); - else { + } else { li_->RemoveMachineInstrFromMaps(MI); mii = mbbi->erase(mii); ++numPeep; @@ -1840,9 +1794,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { } // If the move will be an identity move delete it - bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); - if (MI->isIdentityCopy() || - (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) { + if (MI->isIdentityCopy()) { + unsigned SrcReg = MI->getOperand(1).getReg(); if (li_->hasInterval(SrcReg)) { LiveInterval &RegInt = li_->getInterval(SrcReg); // If def of this move instruction is dead, remove its live range diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index e154da60affa2..855bdb98b36c6 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -47,7 +47,6 @@ namespace llvm { const MachineLoopInfo* loopInfo; AliasAnalysis *AA; - BitVector allocatableRegs_; DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_; /// JoinedCopies - Keep track of copies eliminated due to coalescing. @@ -64,7 +63,7 @@ namespace llvm { public: static char ID; // Pass identifcation, replacement for typeid - SimpleRegisterCoalescing() : MachineFunctionPass(&ID) {} + SimpleRegisterCoalescing() : MachineFunctionPass(ID) {} struct InstrSlots { enum { diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index e90869d600dd6..b637980f885c3 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -58,7 +58,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit SjLjEHPass(const TargetLowering *tli = NULL) - : FunctionPass(&ID), TLI(tli) { } + : FunctionPass(ID), TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 7a227cf02d57d..1bc148f160bc8 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -40,7 +40,8 @@ namespace { } char SlotIndexes::ID = 0; -static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering"); +INITIALIZE_PASS(SlotIndexes, "slotindexes", + "Slot index numbering", false, false); IndexListEntry* IndexListEntry::getEmptyKeyEntry() { return &*IndexListEntryEmptyKey; diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 56bcb2824ae8a..59d5ab33c994f 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -49,29 +50,31 @@ namespace { /// Utility class for spillers. class SpillerBase : public Spiller { protected: + MachineFunctionPass *pass; MachineFunction *mf; + VirtRegMap *vrm; LiveIntervals *lis; MachineFrameInfo *mfi; MachineRegisterInfo *mri; const TargetInstrInfo *tii; const TargetRegisterInfo *tri; - VirtRegMap *vrm; /// Construct a spiller base. - SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) - : mf(mf), lis(lis), vrm(vrm) + SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) + : pass(&pass), mf(&mf), vrm(&vrm) { - mfi = mf->getFrameInfo(); - mri = &mf->getRegInfo(); - tii = mf->getTarget().getInstrInfo(); - tri = mf->getTarget().getRegisterInfo(); + lis = &pass.getAnalysis<LiveIntervals>(); + mfi = mf.getFrameInfo(); + mri = &mf.getRegInfo(); + tii = mf.getTarget().getInstrInfo(); + tri = mf.getTarget().getRegisterInfo(); } /// Add spill ranges for every use/def of the live interval, inserting loads /// immediately before each use, and stores after each def. No folding or /// remat is attempted. void trivialSpillEverywhere(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals) { + SmallVectorImpl<LiveInterval*> &newIntervals) { DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); assert(li->weight != HUGE_VALF && @@ -173,13 +176,13 @@ namespace { class TrivialSpiller : public SpillerBase { public: - TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) - : SpillerBase(mf, lis, vrm) {} + TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf, + VirtRegMap &vrm) + : SpillerBase(pass, mf, vrm) {} void spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &, - SlotIndex*) { + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &) { // Ignore spillIs - we don't use it. trivialSpillEverywhere(li, newIntervals); } @@ -193,18 +196,19 @@ namespace { class StandardSpiller : public Spiller { protected: LiveIntervals *lis; - const MachineLoopInfo *loopInfo; + MachineLoopInfo *loopInfo; VirtRegMap *vrm; public: - StandardSpiller(LiveIntervals *lis, const MachineLoopInfo *loopInfo, - VirtRegMap *vrm) - : lis(lis), loopInfo(loopInfo), vrm(vrm) {} + StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf, + VirtRegMap &vrm) + : lis(&pass.getAnalysis<LiveIntervals>()), + loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()), + vrm(&vrm) {} /// Falls back on LiveIntervals::addIntervalsForSpills. void spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex*) { + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) { std::vector<LiveInterval*> added = lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); newIntervals.insert(newIntervals.end(), added.begin(), added.end()); @@ -221,23 +225,21 @@ namespace { /// then the spiller falls back on the standard spilling mechanism. class SplittingSpiller : public StandardSpiller { public: - SplittingSpiller(MachineFunction *mf, LiveIntervals *lis, - const MachineLoopInfo *loopInfo, VirtRegMap *vrm) - : StandardSpiller(lis, loopInfo, vrm) { - - mri = &mf->getRegInfo(); - tii = mf->getTarget().getInstrInfo(); - tri = mf->getTarget().getRegisterInfo(); + SplittingSpiller(MachineFunctionPass &pass, MachineFunction &mf, + VirtRegMap &vrm) + : StandardSpiller(pass, mf, vrm) { + mri = &mf.getRegInfo(); + tii = mf.getTarget().getInstrInfo(); + tri = mf.getTarget().getRegisterInfo(); } void spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex *earliestStart) { + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) { if (worthTryingToSplit(li)) - tryVNISplit(li, earliestStart); + tryVNISplit(li); else - StandardSpiller::spill(li, newIntervals, spillIs, earliestStart); + StandardSpiller::spill(li, newIntervals, spillIs); } private: @@ -252,8 +254,7 @@ private: } /// Try to break a LiveInterval into its component values. - std::vector<LiveInterval*> tryVNISplit(LiveInterval *li, - SlotIndex *earliestStart) { + std::vector<LiveInterval*> tryVNISplit(LiveInterval *li) { DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n"); @@ -277,10 +278,6 @@ private: DEBUG(dbgs() << *splitInterval << "\n"); added.push_back(splitInterval); alreadySplit.insert(splitInterval); - if (earliestStart != 0) { - if (splitInterval->beginIndex() < *earliestStart) - *earliestStart = splitInterval->beginIndex(); - } } else { DEBUG(dbgs() << "0\n"); } @@ -293,10 +290,6 @@ private: if (!li->empty()) { added.push_back(li); alreadySplit.insert(li); - if (earliestStart != 0) { - if (li->beginIndex() < *earliestStart) - *earliestStart = li->beginIndex(); - } } return added; @@ -506,20 +499,19 @@ private: namespace llvm { -Spiller *createInlineSpiller(MachineFunction*, - LiveIntervals*, - const MachineLoopInfo*, - VirtRegMap*); +Spiller *createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm); } -llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, - const MachineLoopInfo *loopInfo, - VirtRegMap *vrm) { +llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) { switch (spillerOpt) { default: assert(0 && "unknown spiller"); - case trivial: return new TrivialSpiller(mf, lis, vrm); - case standard: return new StandardSpiller(lis, loopInfo, vrm); - case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); - case inline_: return createInlineSpiller(mf, lis, loopInfo, vrm); + case trivial: return new TrivialSpiller(pass, mf, vrm); + case standard: return new StandardSpiller(pass, mf, vrm); + case splitting: return new SplittingSpiller(pass, mf, vrm); + case inline_: return createInlineSpiller(pass, mf, vrm); } } diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index 450447b3933a8..59bc0ec6ae70f 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -11,19 +11,14 @@ #define LLVM_CODEGEN_SPILLER_H #include "llvm/ADT/SmallVector.h" -#include <vector> namespace llvm { class LiveInterval; - class LiveIntervals; - class LiveStacks; class MachineFunction; - class MachineInstr; - class MachineLoopInfo; + class MachineFunctionPass; class SlotIndex; class VirtRegMap; - class VNInfo; /// Spiller interface. /// @@ -40,18 +35,16 @@ namespace llvm { /// @param spillIs A list of intervals that are about to be spilled, /// and so cannot be used for remat etc. /// @param newIntervals The newly created intervals will be appended here. - /// @param earliestIndex The earliest point for splitting. (OK, it's another - /// pointer to the allocator guts). virtual void spill(LiveInterval *li, - std::vector<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs, - SlotIndex *earliestIndex = 0) = 0; + SmallVectorImpl<LiveInterval*> &newIntervals, + SmallVectorImpl<LiveInterval*> &spillIs) = 0; }; /// Create and return a spiller object, as specified on the command line. - Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li, - const MachineLoopInfo *loopInfo, VirtRegMap *vrm); + Spiller* createSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm); } #endif diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp new file mode 100644 index 0000000000000..29474f0d55121 --- /dev/null +++ b/lib/CodeGen/SplitKit.cpp @@ -0,0 +1,1097 @@ +//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SplitAnalysis class as well as mutator functions for +// live range splitting. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "splitter" +#include "SplitKit.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +static cl::opt<bool> +AllowSplit("spiller-splits-edges", + cl::desc("Allow critical edge splitting during spilling")); + +//===----------------------------------------------------------------------===// +// Split Analysis +//===----------------------------------------------------------------------===// + +SplitAnalysis::SplitAnalysis(const MachineFunction &mf, + const LiveIntervals &lis, + const MachineLoopInfo &mli) + : mf_(mf), + lis_(lis), + loops_(mli), + tii_(*mf.getTarget().getInstrInfo()), + curli_(0) {} + +void SplitAnalysis::clear() { + usingInstrs_.clear(); + usingBlocks_.clear(); + usingLoops_.clear(); + curli_ = 0; +} + +bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) { + MachineBasicBlock *T, *F; + SmallVector<MachineOperand, 4> Cond; + return !tii_.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond); +} + +/// analyzeUses - Count instructions, basic blocks, and loops using curli. +void SplitAnalysis::analyzeUses() { + const MachineRegisterInfo &MRI = mf_.getRegInfo(); + for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(curli_->reg); + MachineInstr *MI = I.skipInstruction();) { + if (MI->isDebugValue() || !usingInstrs_.insert(MI)) + continue; + MachineBasicBlock *MBB = MI->getParent(); + if (usingBlocks_[MBB]++) + continue; + if (MachineLoop *Loop = loops_.getLoopFor(MBB)) + usingLoops_[Loop]++; + } + DEBUG(dbgs() << " counted " + << usingInstrs_.size() << " instrs, " + << usingBlocks_.size() << " blocks, " + << usingLoops_.size() << " loops.\n"); +} + +/// removeUse - Update statistics by noting that MI no longer uses curli. +void SplitAnalysis::removeUse(const MachineInstr *MI) { + if (!usingInstrs_.erase(MI)) + return; + + // Decrement MBB count. + const MachineBasicBlock *MBB = MI->getParent(); + BlockCountMap::iterator bi = usingBlocks_.find(MBB); + assert(bi != usingBlocks_.end() && "MBB missing"); + assert(bi->second && "0 count in map"); + if (--bi->second) + return; + // No more uses in MBB. + usingBlocks_.erase(bi); + + // Decrement loop count. + MachineLoop *Loop = loops_.getLoopFor(MBB); + if (!Loop) + return; + LoopCountMap::iterator li = usingLoops_.find(Loop); + assert(li != usingLoops_.end() && "Loop missing"); + assert(li->second && "0 count in map"); + if (--li->second) + return; + // No more blocks in Loop. + usingLoops_.erase(li); +} + +// Get three sets of basic blocks surrounding a loop: Blocks inside the loop, +// predecessor blocks, and exit blocks. +void SplitAnalysis::getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks) { + Blocks.clear(); + + // Blocks in the loop. + Blocks.Loop.insert(Loop->block_begin(), Loop->block_end()); + + // Predecessor blocks. + const MachineBasicBlock *Header = Loop->getHeader(); + for (MachineBasicBlock::const_pred_iterator I = Header->pred_begin(), + E = Header->pred_end(); I != E; ++I) + if (!Blocks.Loop.count(*I)) + Blocks.Preds.insert(*I); + + // Exit blocks. + for (MachineLoop::block_iterator I = Loop->block_begin(), + E = Loop->block_end(); I != E; ++I) { + const MachineBasicBlock *MBB = *I; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if (!Blocks.Loop.count(*SI)) + Blocks.Exits.insert(*SI); + } +} + +/// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in +/// and around the Loop. +SplitAnalysis::LoopPeripheralUse SplitAnalysis:: +analyzeLoopPeripheralUse(const SplitAnalysis::LoopBlocks &Blocks) { + LoopPeripheralUse use = ContainedInLoop; + for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end(); + I != E; ++I) { + const MachineBasicBlock *MBB = I->first; + // Is this a peripheral block? + if (use < MultiPeripheral && + (Blocks.Preds.count(MBB) || Blocks.Exits.count(MBB))) { + if (I->second > 1) use = MultiPeripheral; + else use = SinglePeripheral; + continue; + } + // Is it a loop block? + if (Blocks.Loop.count(MBB)) + continue; + // It must be an unrelated block. + return OutsideLoop; + } + return use; +} + +/// getCriticalExits - It may be necessary to partially break critical edges +/// leaving the loop if an exit block has phi uses of curli. Collect the exit +/// blocks that need special treatment into CriticalExits. +void SplitAnalysis::getCriticalExits(const SplitAnalysis::LoopBlocks &Blocks, + BlockPtrSet &CriticalExits) { + CriticalExits.clear(); + + // A critical exit block contains a phi def of curli, and has a predecessor + // that is not in the loop nor a loop predecessor. + // For such an exit block, the edges carrying the new variable must be moved + // to a new pre-exit block. + for (BlockPtrSet::iterator I = Blocks.Exits.begin(), E = Blocks.Exits.end(); + I != E; ++I) { + const MachineBasicBlock *Succ = *I; + SlotIndex SuccIdx = lis_.getMBBStartIdx(Succ); + VNInfo *SuccVNI = curli_->getVNInfoAt(SuccIdx); + // This exit may not have curli live in at all. No need to split. + if (!SuccVNI) + continue; + // If this is not a PHI def, it is either using a value from before the + // loop, or a value defined inside the loop. Both are safe. + if (!SuccVNI->isPHIDef() || SuccVNI->def.getBaseIndex() != SuccIdx) + continue; + // This exit block does have a PHI. Does it also have a predecessor that is + // not a loop block or loop predecessor? + for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(), + PE = Succ->pred_end(); PI != PE; ++PI) { + const MachineBasicBlock *Pred = *PI; + if (Blocks.Loop.count(Pred) || Blocks.Preds.count(Pred)) + continue; + // This is a critical exit block, and we need to split the exit edge. + CriticalExits.insert(Succ); + break; + } + } +} + +/// canSplitCriticalExits - Return true if it is possible to insert new exit +/// blocks before the blocks in CriticalExits. +bool +SplitAnalysis::canSplitCriticalExits(const SplitAnalysis::LoopBlocks &Blocks, + BlockPtrSet &CriticalExits) { + // If we don't allow critical edge splitting, require no critical exits. + if (!AllowSplit) + return CriticalExits.empty(); + + for (BlockPtrSet::iterator I = CriticalExits.begin(), E = CriticalExits.end(); + I != E; ++I) { + const MachineBasicBlock *Succ = *I; + // We want to insert a new pre-exit MBB before Succ, and change all the + // in-loop blocks to branch to the pre-exit instead of Succ. + // Check that all the in-loop predecessors can be changed. + for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(), + PE = Succ->pred_end(); PI != PE; ++PI) { + const MachineBasicBlock *Pred = *PI; + // The external predecessors won't be altered. + if (!Blocks.Loop.count(Pred) && !Blocks.Preds.count(Pred)) + continue; + if (!canAnalyzeBranch(Pred)) + return false; + } + + // If Succ's layout predecessor falls through, that too must be analyzable. + // We need to insert the pre-exit block in the gap. + MachineFunction::const_iterator MFI = Succ; + if (MFI == mf_.begin()) + continue; + if (!canAnalyzeBranch(--MFI)) + return false; + } + // No problems found. + return true; +} + +void SplitAnalysis::analyze(const LiveInterval *li) { + clear(); + curli_ = li; + analyzeUses(); +} + +const MachineLoop *SplitAnalysis::getBestSplitLoop() { + assert(curli_ && "Call analyze() before getBestSplitLoop"); + if (usingLoops_.empty()) + return 0; + + LoopPtrSet Loops, SecondLoops; + LoopBlocks Blocks; + BlockPtrSet CriticalExits; + + // Find first-class and second class candidate loops. + // We prefer to split around loops where curli is used outside the periphery. + for (LoopCountMap::const_iterator I = usingLoops_.begin(), + E = usingLoops_.end(); I != E; ++I) { + const MachineLoop *Loop = I->first; + getLoopBlocks(Loop, Blocks); + + // FIXME: We need an SSA updater to properly handle multiple exit blocks. + if (Blocks.Exits.size() > 1) { + DEBUG(dbgs() << " multiple exits from " << *Loop); + continue; + } + + LoopPtrSet *LPS = 0; + switch(analyzeLoopPeripheralUse(Blocks)) { + case OutsideLoop: + LPS = &Loops; + break; + case MultiPeripheral: + LPS = &SecondLoops; + break; + case ContainedInLoop: + DEBUG(dbgs() << " contained in " << *Loop); + continue; + case SinglePeripheral: + DEBUG(dbgs() << " single peripheral use in " << *Loop); + continue; + } + // Will it be possible to split around this loop? + getCriticalExits(Blocks, CriticalExits); + DEBUG(dbgs() << " " << CriticalExits.size() << " critical exits from " + << *Loop); + if (!canSplitCriticalExits(Blocks, CriticalExits)) + continue; + // This is a possible split. + assert(LPS); + LPS->insert(Loop); + } + + DEBUG(dbgs() << " getBestSplitLoop found " << Loops.size() << " + " + << SecondLoops.size() << " candidate loops.\n"); + + // If there are no first class loops available, look at second class loops. + if (Loops.empty()) + Loops = SecondLoops; + + if (Loops.empty()) + return 0; + + // Pick the earliest loop. + // FIXME: Are there other heuristics to consider? + const MachineLoop *Best = 0; + SlotIndex BestIdx; + for (LoopPtrSet::const_iterator I = Loops.begin(), E = Loops.end(); I != E; + ++I) { + SlotIndex Idx = lis_.getMBBStartIdx((*I)->getHeader()); + if (!Best || Idx < BestIdx) + Best = *I, BestIdx = Idx; + } + DEBUG(dbgs() << " getBestSplitLoop found " << *Best); + return Best; +} + +/// getMultiUseBlocks - if curli has more than one use in a basic block, it +/// may be an advantage to split curli for the duration of the block. +bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) { + // If curli is local to one block, there is no point to splitting it. + if (usingBlocks_.size() <= 1) + return false; + // Add blocks with multiple uses. + for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end(); + I != E; ++I) + switch (I->second) { + case 0: + case 1: + continue; + case 2: { + // It doesn't pay to split a 2-instr block if it redefines curli. + VNInfo *VN1 = curli_->getVNInfoAt(lis_.getMBBStartIdx(I->first)); + VNInfo *VN2 = + curli_->getVNInfoAt(lis_.getMBBEndIdx(I->first).getPrevIndex()); + // live-in and live-out with a different value. + if (VN1 && VN2 && VN1 != VN2) + continue; + } // Fall through. + default: + Blocks.insert(I->first); + } + return !Blocks.empty(); +} + +//===----------------------------------------------------------------------===// +// LiveIntervalMap +//===----------------------------------------------------------------------===// + +// defValue - Introduce a li_ def for ParentVNI that could be later than +// ParentVNI->def. +VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) { + assert(ParentVNI && "Mapping NULL value"); + assert(Idx.isValid() && "Invalid SlotIndex"); + assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + + // Is this a simple 1-1 mapping? Not likely. + if (Idx == ParentVNI->def) + return mapValue(ParentVNI, Idx); + + // This is a complex def. Mark with a NULL in valueMap. + VNInfo *OldVNI = + valueMap_.insert( + ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))).first->second; + // The static_cast<VNInfo *> is only needed to work around a bug in an + // old version of the C++0x standard which the following compilers + // implemented and have yet to fix: + // + // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel + // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01 + // + // If/When we move to C++0x, this can be replaced by nullptr. + (void)OldVNI; + assert(OldVNI == 0 && "Simple/Complex values mixed"); + + // Should we insert a minimal snippet of VNI LiveRange, or can we count on + // callers to do that? We need it for lookups of complex values. + VNInfo *VNI = li_.getNextValue(Idx, 0, true, lis_.getVNInfoAllocator()); + return VNI; +} + +// mapValue - Find the mapped value for ParentVNI at Idx. +// Potentially create phi-def values. +VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) { + assert(ParentVNI && "Mapping NULL value"); + assert(Idx.isValid() && "Invalid SlotIndex"); + assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + + // Use insert for lookup, so we can add missing values with a second lookup. + std::pair<ValueMap::iterator,bool> InsP = + valueMap_.insert(ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))); + // The static_cast<VNInfo *> is only needed to work around a bug in an + // old version of the C++0x standard which the following compilers + // implemented and have yet to fix: + // + // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel + // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01 + // + // If/When we move to C++0x, this can be replaced by nullptr. + + // This was an unknown value. Create a simple mapping. + if (InsP.second) + return InsP.first->second = li_.createValueCopy(ParentVNI, + lis_.getVNInfoAllocator()); + // This was a simple mapped value. + if (InsP.first->second) + return InsP.first->second; + + // This is a complex mapped value. There may be multiple defs, and we may need + // to create phi-defs. + MachineBasicBlock *IdxMBB = lis_.getMBBFromIndex(Idx); + assert(IdxMBB && "No MBB at Idx"); + + // Is there a def in the same MBB we can extend? + if (VNInfo *VNI = extendTo(IdxMBB, Idx)) + return VNI; + + // Now for the fun part. We know that ParentVNI potentially has multiple defs, + // and we may need to create even more phi-defs to preserve VNInfo SSA form. + // Perform a depth-first search for predecessor blocks where we know the + // dominating VNInfo. Insert phi-def VNInfos along the path back to IdxMBB. + + // Track MBBs where we have created or learned the dominating value. + // This may change during the DFS as we create new phi-defs. + typedef DenseMap<MachineBasicBlock*, VNInfo*> MBBValueMap; + MBBValueMap DomValue; + + for (idf_iterator<MachineBasicBlock*> + IDFI = idf_begin(IdxMBB), + IDFE = idf_end(IdxMBB); IDFI != IDFE;) { + MachineBasicBlock *MBB = *IDFI; + SlotIndex End = lis_.getMBBEndIdx(MBB); + + // We are operating on the restricted CFG where ParentVNI is live. + if (parentli_.getVNInfoAt(End.getPrevSlot()) != ParentVNI) { + IDFI.skipChildren(); + continue; + } + + // Do we have a dominating value in this block? + VNInfo *VNI = extendTo(MBB, End); + if (!VNI) { + ++IDFI; + continue; + } + + // Yes, VNI dominates MBB. Track the path back to IdxMBB, creating phi-defs + // as needed along the way. + for (unsigned PI = IDFI.getPathLength()-1; PI != 0; --PI) { + // Start from MBB's immediate successor. End at IdxMBB. + MachineBasicBlock *Succ = IDFI.getPath(PI-1); + std::pair<MBBValueMap::iterator, bool> InsP = + DomValue.insert(MBBValueMap::value_type(Succ, VNI)); + + // This is the first time we backtrack to Succ. + if (InsP.second) + continue; + + // We reached Succ again with the same VNI. Nothing is going to change. + VNInfo *OVNI = InsP.first->second; + if (OVNI == VNI) + break; + + // Succ already has a phi-def. No need to continue. + SlotIndex Start = lis_.getMBBStartIdx(Succ); + if (OVNI->def == Start) + break; + + // We have a collision between the old and new VNI at Succ. That means + // neither dominates and we need a new phi-def. + VNI = li_.getNextValue(Start, 0, true, lis_.getVNInfoAllocator()); + VNI->setIsPHIDef(true); + InsP.first->second = VNI; + + // Replace OVNI with VNI in the remaining path. + for (; PI > 1 ; --PI) { + MBBValueMap::iterator I = DomValue.find(IDFI.getPath(PI-2)); + if (I == DomValue.end() || I->second != OVNI) + break; + I->second = VNI; + } + } + + // No need to search the children, we found a dominating value. + IDFI.skipChildren(); + } + + // The search should at least find a dominating value for IdxMBB. + assert(!DomValue.empty() && "Couldn't find a reaching definition"); + + // Since we went through the trouble of a full DFS visiting all reaching defs, + // the values in DomValue are now accurate. No more phi-defs are needed for + // these blocks, so we can color the live ranges. + // This makes the next mapValue call much faster. + VNInfo *IdxVNI = 0; + for (MBBValueMap::iterator I = DomValue.begin(), E = DomValue.end(); I != E; + ++I) { + MachineBasicBlock *MBB = I->first; + VNInfo *VNI = I->second; + SlotIndex Start = lis_.getMBBStartIdx(MBB); + if (MBB == IdxMBB) { + // Don't add full liveness to IdxMBB, stop at Idx. + if (Start != Idx) + li_.addRange(LiveRange(Start, Idx, VNI)); + // The caller had better add some liveness to IdxVNI, or it leaks. + IdxVNI = VNI; + } else + li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI)); + } + + assert(IdxVNI && "Didn't find value for Idx"); + return IdxVNI; +} + +// extendTo - Find the last li_ value defined in MBB at or before Idx. The +// parentli_ is assumed to be live at Idx. Extend the live range to Idx. +// Return the found VNInfo, or NULL. +VNInfo *LiveIntervalMap::extendTo(MachineBasicBlock *MBB, SlotIndex Idx) { + LiveInterval::iterator I = std::upper_bound(li_.begin(), li_.end(), Idx); + if (I == li_.begin()) + return 0; + --I; + if (I->start < lis_.getMBBStartIdx(MBB)) + return 0; + if (I->end < Idx) + I->end = Idx; + return I->valno; +} + +// addSimpleRange - Add a simple range from parentli_ to li_. +// ParentVNI must be live in the [Start;End) interval. +void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End, + const VNInfo *ParentVNI) { + VNInfo *VNI = mapValue(ParentVNI, Start); + // A simple mappoing is easy. + if (VNI->def == ParentVNI->def) { + li_.addRange(LiveRange(Start, End, VNI)); + return; + } + + // ParentVNI is a complex value. We must map per MBB. + MachineFunction::iterator MBB = lis_.getMBBFromIndex(Start); + MachineFunction::iterator MBBE = lis_.getMBBFromIndex(End); + + if (MBB == MBBE) { + li_.addRange(LiveRange(Start, End, VNI)); + return; + } + + // First block. + li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI)); + + // Run sequence of full blocks. + for (++MBB; MBB != MBBE; ++MBB) { + Start = lis_.getMBBStartIdx(MBB); + li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), + mapValue(ParentVNI, Start))); + } + + // Final block. + Start = lis_.getMBBStartIdx(MBB); + if (Start != End) + li_.addRange(LiveRange(Start, End, mapValue(ParentVNI, Start))); +} + +/// addRange - Add live ranges to li_ where [Start;End) intersects parentli_. +/// All needed values whose def is not inside [Start;End) must be defined +/// beforehand so mapValue will work. +void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) { + LiveInterval::const_iterator B = parentli_.begin(), E = parentli_.end(); + LiveInterval::const_iterator I = std::lower_bound(B, E, Start); + + // Check if --I begins before Start and overlaps. + if (I != B) { + --I; + if (I->end > Start) + addSimpleRange(Start, std::min(End, I->end), I->valno); + ++I; + } + + // The remaining ranges begin after Start. + for (;I != E && I->start < End; ++I) + addSimpleRange(I->start, std::min(End, I->end), I->valno); +} + +//===----------------------------------------------------------------------===// +// Split Editor +//===----------------------------------------------------------------------===// + +/// Create a new SplitEditor for editing the LiveInterval analyzed by SA. +SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, + SmallVectorImpl<LiveInterval*> &intervals) + : sa_(sa), lis_(lis), vrm_(vrm), + mri_(vrm.getMachineFunction().getRegInfo()), + tii_(*vrm.getMachineFunction().getTarget().getInstrInfo()), + curli_(sa_.getCurLI()), + dupli_(0), openli_(0), + intervals_(intervals), + firstInterval(intervals_.size()) +{ + assert(curli_ && "SplitEditor created from empty SplitAnalysis"); + + // Make sure curli_ is assigned a stack slot, so all our intervals get the + // same slot as curli_. + if (vrm_.getStackSlot(curli_->reg) == VirtRegMap::NO_STACK_SLOT) + vrm_.assignVirt2StackSlot(curli_->reg); + +} + +LiveInterval *SplitEditor::createInterval() { + unsigned curli = sa_.getCurLI()->reg; + unsigned Reg = mri_.createVirtualRegister(mri_.getRegClass(curli)); + LiveInterval &Intv = lis_.getOrCreateInterval(Reg); + vrm_.grow(); + vrm_.assignVirt2StackSlot(Reg, vrm_.getStackSlot(curli)); + return &Intv; +} + +LiveInterval *SplitEditor::getDupLI() { + if (!dupli_) { + // Create an interval for dupli that is a copy of curli. + dupli_ = createInterval(); + dupli_->Copy(*curli_, &mri_, lis_.getVNInfoAllocator()); + } + return dupli_; +} + +VNInfo *SplitEditor::mapValue(const VNInfo *curliVNI) { + VNInfo *&VNI = valueMap_[curliVNI]; + if (!VNI) + VNI = openli_->createValueCopy(curliVNI, lis_.getVNInfoAllocator()); + return VNI; +} + +/// Insert a COPY instruction curli -> li. Allocate a new value from li +/// defined by the COPY. Note that rewrite() will deal with the curli +/// register, so this function can be used to copy from any interval - openli, +/// curli, or dupli. +VNInfo *SplitEditor::insertCopy(LiveInterval &LI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + MachineInstr *MI = BuildMI(MBB, I, DebugLoc(), tii_.get(TargetOpcode::COPY), + LI.reg).addReg(curli_->reg); + SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + return LI.getNextValue(DefIdx, MI, true, lis_.getVNInfoAllocator()); +} + +/// Create a new virtual register and live interval. +void SplitEditor::openIntv() { + assert(!openli_ && "Previous LI not closed before openIntv"); + openli_ = createInterval(); + intervals_.push_back(openli_); + liveThrough_ = false; +} + +/// enterIntvBefore - Enter openli before the instruction at Idx. If curli is +/// not live before Idx, a COPY is not inserted. +void SplitEditor::enterIntvBefore(SlotIndex Idx) { + assert(openli_ && "openIntv not called before enterIntvBefore"); + + // Copy from curli_ if it is live. + if (VNInfo *CurVNI = curli_->getVNInfoAt(Idx.getUseIndex())) { + MachineInstr *MI = lis_.getInstructionFromIndex(Idx); + assert(MI && "enterIntvBefore called with invalid index"); + VNInfo *VNI = insertCopy(*openli_, *MI->getParent(), MI); + openli_->addRange(LiveRange(VNI->def, Idx.getDefIndex(), VNI)); + + // Make sure CurVNI is properly mapped. + VNInfo *&mapVNI = valueMap_[CurVNI]; + // We dont have SSA update yet, so only one entry per value is allowed. + assert(!mapVNI && "enterIntvBefore called more than once for the same value"); + mapVNI = VNI; + } + DEBUG(dbgs() << " enterIntvBefore " << Idx << ": " << *openli_ << '\n'); +} + +/// enterIntvAtEnd - Enter openli at the end of MBB. +/// PhiMBB is a successor inside openli where a PHI value is created. +/// Currently, all entries must share the same PhiMBB. +void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) { + assert(openli_ && "openIntv not called before enterIntvAtEnd"); + + SlotIndex EndA = lis_.getMBBEndIdx(&A); + VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex()); + if (!CurVNIA) { + DEBUG(dbgs() << " enterIntvAtEnd, curli not live out of BB#" + << A.getNumber() << ".\n"); + return; + } + + // Add a phi kill value and live range out of A. + VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator()); + openli_->addRange(LiveRange(VNIA->def, EndA, VNIA)); + + // FIXME: If this is the only entry edge, we don't need the extra PHI value. + // FIXME: If there are multiple entry blocks (so not a loop), we need proper + // SSA update. + + // Now look at the start of B. + SlotIndex StartB = lis_.getMBBStartIdx(&B); + SlotIndex EndB = lis_.getMBBEndIdx(&B); + const LiveRange *CurB = curli_->getLiveRangeContaining(StartB); + if (!CurB) { + DEBUG(dbgs() << " enterIntvAtEnd: curli not live in to BB#" + << B.getNumber() << ".\n"); + return; + } + + VNInfo *VNIB = openli_->getVNInfoAt(StartB); + if (!VNIB) { + // Create a phi value. + VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false, + lis_.getVNInfoAllocator()); + VNIB->setIsPHIDef(true); + VNInfo *&mapVNI = valueMap_[CurB->valno]; + if (mapVNI) { + // Multiple copies - must create PHI value. + abort(); + } else { + // This is the first copy of dupLR. Mark the mapping. + mapVNI = VNIB; + } + + } + + DEBUG(dbgs() << " enterIntvAtEnd: " << *openli_ << '\n'); +} + +/// useIntv - indicate that all instructions in MBB should use openli. +void SplitEditor::useIntv(const MachineBasicBlock &MBB) { + useIntv(lis_.getMBBStartIdx(&MBB), lis_.getMBBEndIdx(&MBB)); +} + +void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) { + assert(openli_ && "openIntv not called before useIntv"); + + // Map the curli values from the interval into openli_ + LiveInterval::const_iterator B = curli_->begin(), E = curli_->end(); + LiveInterval::const_iterator I = std::lower_bound(B, E, Start); + + if (I != B) { + --I; + // I begins before Start, but overlaps. + if (I->end > Start) + openli_->addRange(LiveRange(Start, std::min(End, I->end), + mapValue(I->valno))); + ++I; + } + + // The remaining ranges begin after Start. + for (;I != E && I->start < End; ++I) + openli_->addRange(LiveRange(I->start, std::min(End, I->end), + mapValue(I->valno))); + DEBUG(dbgs() << " use [" << Start << ';' << End << "): " << *openli_ + << '\n'); +} + +/// leaveIntvAfter - Leave openli after the instruction at Idx. +void SplitEditor::leaveIntvAfter(SlotIndex Idx) { + assert(openli_ && "openIntv not called before leaveIntvAfter"); + + const LiveRange *CurLR = curli_->getLiveRangeContaining(Idx.getDefIndex()); + if (!CurLR || CurLR->end <= Idx.getBoundaryIndex()) { + DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": not live\n"); + return; + } + + // Was this value of curli live through openli? + if (!openli_->liveAt(CurLR->valno->def)) { + DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": using external value\n"); + liveThrough_ = true; + return; + } + + // We are going to insert a back copy, so we must have a dupli_. + LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Idx.getDefIndex()); + assert(DupLR && "dupli not live into black, but curli is?"); + + // Insert the COPY instruction. + MachineBasicBlock::iterator I = lis_.getInstructionFromIndex(Idx); + MachineInstr *MI = BuildMI(*I->getParent(), llvm::next(I), I->getDebugLoc(), + tii_.get(TargetOpcode::COPY), dupli_->reg) + .addReg(openli_->reg); + SlotIndex CopyIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + openli_->addRange(LiveRange(Idx.getDefIndex(), CopyIdx, + mapValue(CurLR->valno))); + DupLR->valno->def = CopyIdx; + DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": " << *openli_ << '\n'); +} + +/// leaveIntvAtTop - Leave the interval at the top of MBB. +/// Currently, only one value can leave the interval. +void SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { + assert(openli_ && "openIntv not called before leaveIntvAtTop"); + + SlotIndex Start = lis_.getMBBStartIdx(&MBB); + const LiveRange *CurLR = curli_->getLiveRangeContaining(Start); + + // Is curli even live-in to MBB? + if (!CurLR) { + DEBUG(dbgs() << " leaveIntvAtTop at " << Start << ": not live\n"); + return; + } + + // Is curli defined by PHI at the beginning of MBB? + bool isPHIDef = CurLR->valno->isPHIDef() && + CurLR->valno->def.getBaseIndex() == Start; + + // If MBB is using a value of curli that was defined outside the openli range, + // we don't want to copy it back here. + if (!isPHIDef && !openli_->liveAt(CurLR->valno->def)) { + DEBUG(dbgs() << " leaveIntvAtTop at " << Start + << ": using external value\n"); + liveThrough_ = true; + return; + } + + // We are going to insert a back copy, so we must have a dupli_. + LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Start); + assert(DupLR && "dupli not live into black, but curli is?"); + + // Insert the COPY instruction. + MachineInstr *MI = BuildMI(MBB, MBB.begin(), DebugLoc(), + tii_.get(TargetOpcode::COPY), dupli_->reg) + .addReg(openli_->reg); + SlotIndex Idx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + + // Adjust dupli and openli values. + if (isPHIDef) { + // dupli was already a PHI on entry to MBB. Simply insert an openli PHI, + // and shift the dupli def down to the COPY. + VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false, + lis_.getVNInfoAllocator()); + VNI->setIsPHIDef(true); + openli_->addRange(LiveRange(VNI->def, Idx, VNI)); + + dupli_->removeRange(Start, Idx); + DupLR->valno->def = Idx; + DupLR->valno->setIsPHIDef(false); + } else { + // The dupli value was defined somewhere inside the openli range. + DEBUG(dbgs() << " leaveIntvAtTop source value defined at " + << DupLR->valno->def << "\n"); + // FIXME: We may not need a PHI here if all predecessors have the same + // value. + VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false, + lis_.getVNInfoAllocator()); + VNI->setIsPHIDef(true); + openli_->addRange(LiveRange(VNI->def, Idx, VNI)); + + // FIXME: What if DupLR->valno is used by multiple exits? SSA Update. + + // closeIntv is going to remove the superfluous live ranges. + DupLR->valno->def = Idx; + DupLR->valno->setIsPHIDef(false); + } + + DEBUG(dbgs() << " leaveIntvAtTop at " << Idx << ": " << *openli_ << '\n'); +} + +/// closeIntv - Indicate that we are done editing the currently open +/// LiveInterval, and ranges can be trimmed. +void SplitEditor::closeIntv() { + assert(openli_ && "openIntv not called before closeIntv"); + + DEBUG(dbgs() << " closeIntv cleaning up\n"); + DEBUG(dbgs() << " open " << *openli_ << '\n'); + + if (liveThrough_) { + DEBUG(dbgs() << " value live through region, leaving dupli as is.\n"); + } else { + // live out with copies inserted, or killed by region. Either way we need to + // remove the overlapping region from dupli. + getDupLI(); + for (LiveInterval::iterator I = openli_->begin(), E = openli_->end(); + I != E; ++I) { + dupli_->removeRange(I->start, I->end); + } + // FIXME: A block branching to the entry block may also branch elsewhere + // curli is live. We need both openli and curli to be live in that case. + DEBUG(dbgs() << " dup2 " << *dupli_ << '\n'); + } + openli_ = 0; + valueMap_.clear(); +} + +/// rewrite - after all the new live ranges have been created, rewrite +/// instructions using curli to use the new intervals. +void SplitEditor::rewrite() { + assert(!openli_ && "Previous LI not closed before rewrite"); + const LiveInterval *curli = sa_.getCurLI(); + for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(curli->reg), + RE = mri_.reg_end(); RI != RE;) { + MachineOperand &MO = RI.getOperand(); + MachineInstr *MI = MO.getParent(); + ++RI; + if (MI->isDebugValue()) { + DEBUG(dbgs() << "Zapping " << *MI); + // FIXME: We can do much better with debug values. + MO.setReg(0); + continue; + } + SlotIndex Idx = lis_.getInstructionIndex(MI); + Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + LiveInterval *LI = dupli_; + for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) { + LiveInterval *testli = intervals_[i]; + if (testli->liveAt(Idx)) { + LI = testli; + break; + } + } + if (LI) { + MO.setReg(LI->reg); + sa_.removeUse(MI); + DEBUG(dbgs() << " rewrite " << Idx << '\t' << *MI); + } + } + + // dupli_ goes in last, after rewriting. + if (dupli_) { + if (dupli_->empty()) { + DEBUG(dbgs() << " dupli became empty?\n"); + lis_.removeInterval(dupli_->reg); + dupli_ = 0; + } else { + dupli_->RenumberValues(lis_); + intervals_.push_back(dupli_); + } + } + + // Calculate spill weight and allocation hints for new intervals. + VirtRegAuxInfo vrai(vrm_.getMachineFunction(), lis_, sa_.loops_); + for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) { + LiveInterval &li = *intervals_[i]; + vrai.CalculateRegClass(li.reg); + vrai.CalculateWeightAndHint(li); + DEBUG(dbgs() << " new interval " << mri_.getRegClass(li.reg)->getName() + << ":" << li << '\n'); + } +} + + +//===----------------------------------------------------------------------===// +// Loop Splitting +//===----------------------------------------------------------------------===// + +bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) { + SplitAnalysis::LoopBlocks Blocks; + sa_.getLoopBlocks(Loop, Blocks); + + // Break critical edges as needed. + SplitAnalysis::BlockPtrSet CriticalExits; + sa_.getCriticalExits(Blocks, CriticalExits); + assert(CriticalExits.empty() && "Cannot break critical exits yet"); + + // Create new live interval for the loop. + openIntv(); + + // Insert copies in the predecessors. + for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Preds.begin(), + E = Blocks.Preds.end(); I != E; ++I) { + MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I); + enterIntvAtEnd(MBB, *Loop->getHeader()); + } + + // Switch all loop blocks. + for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Loop.begin(), + E = Blocks.Loop.end(); I != E; ++I) + useIntv(**I); + + // Insert back copies in the exit blocks. + for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Exits.begin(), + E = Blocks.Exits.end(); I != E; ++I) { + MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I); + leaveIntvAtTop(MBB); + } + + // Done. + closeIntv(); + rewrite(); + return dupli_; +} + + +//===----------------------------------------------------------------------===// +// Single Block Splitting +//===----------------------------------------------------------------------===// + +/// splitSingleBlocks - Split curli into a separate live interval inside each +/// basic block in Blocks. Return true if curli has been completely replaced, +/// false if curli is still intact, and needs to be spilled or split further. +bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { + DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n"); + // Determine the first and last instruction using curli in each block. + typedef std::pair<SlotIndex,SlotIndex> IndexPair; + typedef DenseMap<const MachineBasicBlock*,IndexPair> IndexPairMap; + IndexPairMap MBBRange; + for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(), + E = sa_.usingInstrs_.end(); I != E; ++I) { + const MachineBasicBlock *MBB = (*I)->getParent(); + if (!Blocks.count(MBB)) + continue; + SlotIndex Idx = lis_.getInstructionIndex(*I); + DEBUG(dbgs() << " BB#" << MBB->getNumber() << '\t' << Idx << '\t' << **I); + IndexPair &IP = MBBRange[MBB]; + if (!IP.first.isValid() || Idx < IP.first) + IP.first = Idx; + if (!IP.second.isValid() || Idx > IP.second) + IP.second = Idx; + } + + // Create a new interval for each block. + for (SplitAnalysis::BlockPtrSet::const_iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) { + IndexPair &IP = MBBRange[*I]; + DEBUG(dbgs() << " splitting for BB#" << (*I)->getNumber() << ": [" + << IP.first << ';' << IP.second << ")\n"); + assert(IP.first.isValid() && IP.second.isValid()); + + openIntv(); + enterIntvBefore(IP.first); + useIntv(IP.first.getBaseIndex(), IP.second.getBoundaryIndex()); + leaveIntvAfter(IP.second); + closeIntv(); + } + rewrite(); + return dupli_; +} + + +//===----------------------------------------------------------------------===// +// Sub Block Splitting +//===----------------------------------------------------------------------===// + +/// getBlockForInsideSplit - If curli is contained inside a single basic block, +/// and it wou pay to subdivide the interval inside that block, return it. +/// Otherwise return NULL. The returned block can be passed to +/// SplitEditor::splitInsideBlock. +const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() { + // The interval must be exclusive to one block. + if (usingBlocks_.size() != 1) + return 0; + // Don't to this for less than 4 instructions. We want to be sure that + // splitting actually reduces the instruction count per interval. + if (usingInstrs_.size() < 4) + return 0; + return usingBlocks_.begin()->first; +} + +/// splitInsideBlock - Split curli into multiple intervals inside MBB. Return +/// true if curli has been completely replaced, false if curli is still +/// intact, and needs to be spilled or split further. +bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) { + SmallVector<SlotIndex, 32> Uses; + Uses.reserve(sa_.usingInstrs_.size()); + for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(), + E = sa_.usingInstrs_.end(); I != E; ++I) + if ((*I)->getParent() == MBB) + Uses.push_back(lis_.getInstructionIndex(*I)); + DEBUG(dbgs() << " splitInsideBlock BB#" << MBB->getNumber() << " for " + << Uses.size() << " instructions.\n"); + assert(Uses.size() >= 3 && "Need at least 3 instructions"); + array_pod_sort(Uses.begin(), Uses.end()); + + // Simple algorithm: Find the largest gap between uses as determined by slot + // indices. Create new intervals for instructions before the gap and after the + // gap. + unsigned bestPos = 0; + int bestGap = 0; + DEBUG(dbgs() << " dist (" << Uses[0]); + for (unsigned i = 1, e = Uses.size(); i != e; ++i) { + int g = Uses[i-1].distance(Uses[i]); + DEBUG(dbgs() << ") -" << g << "- (" << Uses[i]); + if (g > bestGap) + bestPos = i, bestGap = g; + } + DEBUG(dbgs() << "), best: -" << bestGap << "-\n"); + + // bestPos points to the first use after the best gap. + assert(bestPos > 0 && "Invalid gap"); + + // FIXME: Don't create intervals for low densities. + + // First interval before the gap. Don't create single-instr intervals. + if (bestPos > 1) { + openIntv(); + enterIntvBefore(Uses.front()); + useIntv(Uses.front().getBaseIndex(), Uses[bestPos-1].getBoundaryIndex()); + leaveIntvAfter(Uses[bestPos-1]); + closeIntv(); + } + + // Second interval after the gap. + if (bestPos < Uses.size()-1) { + openIntv(); + enterIntvBefore(Uses[bestPos]); + useIntv(Uses[bestPos].getBaseIndex(), Uses.back().getBoundaryIndex()); + leaveIntvAfter(Uses.back()); + closeIntv(); + } + + rewrite(); + return dupli_; +} diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h new file mode 100644 index 0000000000000..ddef7461dc3d6 --- /dev/null +++ b/lib/CodeGen/SplitKit.h @@ -0,0 +1,321 @@ +//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SplitAnalysis class as well as mutator functions for +// live range splitting. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SlotIndexes.h" + +namespace llvm { + +class LiveInterval; +class LiveIntervals; +class MachineInstr; +class MachineLoop; +class MachineLoopInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class VirtRegMap; +class VNInfo; + +/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting +/// opportunities. +class SplitAnalysis { +public: + const MachineFunction &mf_; + const LiveIntervals &lis_; + const MachineLoopInfo &loops_; + const TargetInstrInfo &tii_; + + // Instructions using the the current register. + typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet; + InstrPtrSet usingInstrs_; + + // The number of instructions using curli in each basic block. + typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap; + BlockCountMap usingBlocks_; + + // The number of basic block using curli in each loop. + typedef DenseMap<const MachineLoop*, unsigned> LoopCountMap; + LoopCountMap usingLoops_; + +private: + // Current live interval. + const LiveInterval *curli_; + + // Sumarize statistics by counting instructions using curli_. + void analyzeUses(); + + /// canAnalyzeBranch - Return true if MBB ends in a branch that can be + /// analyzed. + bool canAnalyzeBranch(const MachineBasicBlock *MBB); + +public: + SplitAnalysis(const MachineFunction &mf, const LiveIntervals &lis, + const MachineLoopInfo &mli); + + /// analyze - set curli to the specified interval, and analyze how it may be + /// split. + void analyze(const LiveInterval *li); + + /// removeUse - Update statistics by noting that mi no longer uses curli. + void removeUse(const MachineInstr *mi); + + const LiveInterval *getCurLI() { return curli_; } + + /// clear - clear all data structures so SplitAnalysis is ready to analyze a + /// new interval. + void clear(); + + typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; + typedef SmallPtrSet<const MachineLoop*, 16> LoopPtrSet; + + // Sets of basic blocks surrounding a machine loop. + struct LoopBlocks { + BlockPtrSet Loop; // Blocks in the loop. + BlockPtrSet Preds; // Loop predecessor blocks. + BlockPtrSet Exits; // Loop exit blocks. + + void clear() { + Loop.clear(); + Preds.clear(); + Exits.clear(); + } + }; + + // Calculate the block sets surrounding the loop. + void getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks); + + /// LoopPeripheralUse - how is a variable used in and around a loop? + /// Peripheral blocks are the loop predecessors and exit blocks. + enum LoopPeripheralUse { + ContainedInLoop, // All uses are inside the loop. + SinglePeripheral, // At most one instruction per peripheral block. + MultiPeripheral, // Multiple instructions in some peripheral blocks. + OutsideLoop // Uses outside loop periphery. + }; + + /// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in + /// and around the Loop. + LoopPeripheralUse analyzeLoopPeripheralUse(const LoopBlocks&); + + /// getCriticalExits - It may be necessary to partially break critical edges + /// leaving the loop if an exit block has phi uses of curli. Collect the exit + /// blocks that need special treatment into CriticalExits. + void getCriticalExits(const LoopBlocks &Blocks, BlockPtrSet &CriticalExits); + + /// canSplitCriticalExits - Return true if it is possible to insert new exit + /// blocks before the blocks in CriticalExits. + bool canSplitCriticalExits(const LoopBlocks &Blocks, + BlockPtrSet &CriticalExits); + + /// getBestSplitLoop - Return the loop where curli may best be split to a + /// separate register, or NULL. + const MachineLoop *getBestSplitLoop(); + + /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from + /// having curli split to a new live interval. Return true if Blocks can be + /// passed to SplitEditor::splitSingleBlocks. + bool getMultiUseBlocks(BlockPtrSet &Blocks); + + /// getBlockForInsideSplit - If curli is contained inside a single basic block, + /// and it wou pay to subdivide the interval inside that block, return it. + /// Otherwise return NULL. The returned block can be passed to + /// SplitEditor::splitInsideBlock. + const MachineBasicBlock *getBlockForInsideSplit(); +}; + + +/// LiveIntervalMap - Map values from a large LiveInterval into a small +/// interval that is a subset. Insert phi-def values as needed. This class is +/// used by SplitEditor to create new smaller LiveIntervals. +/// +/// parentli_ is the larger interval, li_ is the subset interval. Every value +/// in li_ corresponds to exactly one value in parentli_, and the live range +/// of the value is contained within the live range of the parentli_ value. +/// Values in parentli_ may map to any number of openli_ values, including 0. +class LiveIntervalMap { + LiveIntervals &lis_; + + // The parent interval is never changed. + const LiveInterval &parentli_; + + // The child interval's values are fully contained inside parentli_ values. + LiveInterval &li_; + + typedef DenseMap<const VNInfo*, VNInfo*> ValueMap; + + // Map parentli_ values to simple values in li_ that are defined at the same + // SlotIndex, or NULL for parentli_ values that have complex li_ defs. + // Note there is a difference between values mapping to NULL (complex), and + // values not present (unknown/unmapped). + ValueMap valueMap_; + + // extendTo - Find the last li_ value defined in MBB at or before Idx. The + // parentli_ is assumed to be live at Idx. Extend the live range to Idx. + // Return the found VNInfo, or NULL. + VNInfo *extendTo(MachineBasicBlock *MBB, SlotIndex Idx); + + // addSimpleRange - Add a simple range from parentli_ to li_. + // ParentVNI must be live in the [Start;End) interval. + void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI); + +public: + LiveIntervalMap(LiveIntervals &lis, + const LiveInterval &parentli, + LiveInterval &li) + : lis_(lis), parentli_(parentli), li_(li) {} + + /// defValue - define a value in li_ from the parentli_ value VNI and Idx. + /// Idx does not have to be ParentVNI->def, but it must be contained within + /// ParentVNI's live range in parentli_. + /// Return the new li_ value. + VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx); + + /// mapValue - map ParentVNI to the corresponding li_ value at Idx. It is + /// assumed that ParentVNI is live at Idx. + /// If ParentVNI has not been defined by defValue, it is assumed that + /// ParentVNI->def dominates Idx. + /// If ParentVNI has been defined by defValue one or more times, a value that + /// dominates Idx will be returned. This may require creating extra phi-def + /// values and adding live ranges to li_. + VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx); + + /// addRange - Add live ranges to li_ where [Start;End) intersects parentli_. + /// All needed values whose def is not inside [Start;End) must be defined + /// beforehand so mapValue will work. + void addRange(SlotIndex Start, SlotIndex End); +}; + + +/// SplitEditor - Edit machine code and LiveIntervals for live range +/// splitting. +/// +/// - Create a SplitEditor from a SplitAnalysis. +/// - Start a new live interval with openIntv. +/// - Mark the places where the new interval is entered using enterIntv* +/// - Mark the ranges where the new interval is used with useIntv* +/// - Mark the places where the interval is exited with exitIntv*. +/// - Finish the current interval with closeIntv and repeat from 2. +/// - Rewrite instructions with rewrite(). +/// +class SplitEditor { + SplitAnalysis &sa_; + LiveIntervals &lis_; + VirtRegMap &vrm_; + MachineRegisterInfo &mri_; + const TargetInstrInfo &tii_; + + /// curli_ - The immutable interval we are currently splitting. + const LiveInterval *const curli_; + + /// dupli_ - Created as a copy of curli_, ranges are carved out as new + /// intervals get added through openIntv / closeIntv. This is used to avoid + /// editing curli_. + LiveInterval *dupli_; + + /// Currently open LiveInterval. + LiveInterval *openli_; + + /// createInterval - Create a new virtual register and LiveInterval with same + /// register class and spill slot as curli. + LiveInterval *createInterval(); + + /// getDupLI - Ensure dupli is created and return it. + LiveInterval *getDupLI(); + + /// valueMap_ - Map values in cupli to values in openli. These are direct 1-1 + /// mappings, and do not include values created by inserted copies. + DenseMap<const VNInfo*, VNInfo*> valueMap_; + + /// mapValue - Return the openIntv value that corresponds to the given curli + /// value. + VNInfo *mapValue(const VNInfo *curliVNI); + + /// A dupli value is live through openIntv. + bool liveThrough_; + + /// All the new intervals created for this split are added to intervals_. + SmallVectorImpl<LiveInterval*> &intervals_; + + /// The index into intervals_ of the first interval we added. There may be + /// others from before we got it. + unsigned firstInterval; + + /// Insert a COPY instruction curli -> li. Allocate a new value from li + /// defined by the COPY + VNInfo *insertCopy(LiveInterval &LI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I); + +public: + /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. + /// Newly created intervals will be appended to newIntervals. + SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, + SmallVectorImpl<LiveInterval*> &newIntervals); + + /// getAnalysis - Get the corresponding analysis. + SplitAnalysis &getAnalysis() { return sa_; } + + /// Create a new virtual register and live interval. + void openIntv(); + + /// enterIntvBefore - Enter openli before the instruction at Idx. If curli is + /// not live before Idx, a COPY is not inserted. + void enterIntvBefore(SlotIndex Idx); + + /// enterIntvAtEnd - Enter openli at the end of MBB. + /// PhiMBB is a successor inside openli where a PHI value is created. + /// Currently, all entries must share the same PhiMBB. + void enterIntvAtEnd(MachineBasicBlock &MBB, MachineBasicBlock &PhiMBB); + + /// useIntv - indicate that all instructions in MBB should use openli. + void useIntv(const MachineBasicBlock &MBB); + + /// useIntv - indicate that all instructions in range should use openli. + void useIntv(SlotIndex Start, SlotIndex End); + + /// leaveIntvAfter - Leave openli after the instruction at Idx. + void leaveIntvAfter(SlotIndex Idx); + + /// leaveIntvAtTop - Leave the interval at the top of MBB. + /// Currently, only one value can leave the interval. + void leaveIntvAtTop(MachineBasicBlock &MBB); + + /// closeIntv - Indicate that we are done editing the currently open + /// LiveInterval, and ranges can be trimmed. + void closeIntv(); + + /// rewrite - after all the new live ranges have been created, rewrite + /// instructions using curli to use the new intervals. + void rewrite(); + + // ===--- High level methods ---=== + + /// splitAroundLoop - Split curli into a separate live interval inside + /// the loop. Return true if curli has been completely replaced, false if + /// curli is still intact, and needs to be spilled or split further. + bool splitAroundLoop(const MachineLoop*); + + /// splitSingleBlocks - Split curli into a separate live interval inside each + /// basic block in Blocks. Return true if curli has been completely replaced, + /// false if curli is still intact, and needs to be spilled or split further. + bool splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); + + /// splitInsideBlock - Split curli into multiple intervals inside MBB. Return + /// true if curli has been completely replaced, false if curli is still + /// intact, and needs to be spilled or split further. + bool splitInsideBlock(const MachineBasicBlock *); +}; + +} diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp new file mode 100644 index 0000000000000..38f3b1f4d35ea --- /dev/null +++ b/lib/CodeGen/Splitter.cpp @@ -0,0 +1,817 @@ +//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loopsplitter" + +#include "Splitter.h" + +#include "SimpleRegisterCoalescing.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +char LoopSplitter::ID = 0; +INITIALIZE_PASS(LoopSplitter, "loop-splitting", + "Split virtual regists across loop boundaries.", false, false); + +namespace llvm { + + class StartSlotComparator { + public: + StartSlotComparator(LiveIntervals &lis) : lis(lis) {} + bool operator()(const MachineBasicBlock *mbb1, + const MachineBasicBlock *mbb2) const { + return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2); + } + private: + LiveIntervals &lis; + }; + + class LoopSplit { + public: + LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop) + : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) { + assert(TargetRegisterInfo::isVirtualRegister(li.reg) && + "Cannot split physical registers."); + } + + LiveInterval& getLI() const { return li; } + + MachineLoop& getLoop() const { return loop; } + + bool isValid() const { return valid; } + + bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); } + + void invalidate() { valid = false; } + + void splitIncoming() { inSplit = true; } + + void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); } + + void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); } + + void apply() { + assert(valid && "Attempt to apply invalid split."); + applyIncoming(); + applyOutgoing(); + copyRanges(); + renameInside(); + } + + private: + LoopSplitter &ls; + LiveInterval &li; + MachineLoop &loop; + bool valid, inSplit; + std::set<MachineLoop::Edge> outSplits; + std::vector<MachineInstr*> loopInstrs; + + LiveInterval *newLI; + std::map<VNInfo*, VNInfo*> vniMap; + + LiveInterval* getNewLI() { + if (newLI == 0) { + const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg); + unsigned vreg = ls.mri->createVirtualRegister(trc); + newLI = &ls.lis->getOrCreateInterval(vreg); + } + return newLI; + } + + VNInfo* getNewVNI(VNInfo *oldVNI) { + VNInfo *newVNI = vniMap[oldVNI]; + + if (newVNI == 0) { + newVNI = getNewLI()->createValueCopy(oldVNI, + ls.lis->getVNInfoAllocator()); + vniMap[oldVNI] = newVNI; + } + + return newVNI; + } + + void applyIncoming() { + if (!inSplit) { + return; + } + + MachineBasicBlock *preHeader = loop.getLoopPreheader(); + if (preHeader == 0) { + assert(ls.canInsertPreHeader(loop) && + "Can't insert required preheader."); + preHeader = &ls.insertPreHeader(loop); + } + + LiveRange *preHeaderRange = + ls.lis->findExitingRange(li, preHeader); + assert(preHeaderRange != 0 && "Range not live into preheader."); + + // Insert the new copy. + MachineInstr *copy = BuildMI(*preHeader, + preHeader->getFirstTerminator(), + DebugLoc(), + ls.tii->get(TargetOpcode::COPY)) + .addReg(getNewLI()->reg, RegState::Define) + .addReg(li.reg, RegState::Kill); + + ls.lis->InsertMachineInstrInMaps(copy); + + SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); + + VNInfo *newVal = getNewVNI(preHeaderRange->valno); + newVal->def = copyDefIdx; + newVal->setCopy(copy); + newVal->setIsDefAccurate(true); + li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true); + + getNewLI()->addRange(LiveRange(copyDefIdx, + ls.lis->getMBBEndIdx(preHeader), + newVal)); + } + + void applyOutgoing() { + + for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(), + osEnd = outSplits.end(); + osItr != osEnd; ++osItr) { + MachineLoop::Edge edge = *osItr; + MachineBasicBlock *outBlock = edge.second; + if (ls.isCriticalEdge(edge)) { + assert(ls.canSplitEdge(edge) && "Unsplitable critical edge."); + outBlock = &ls.splitEdge(edge, loop); + } + LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock); + assert(outRange != 0 && "No exiting range?"); + + MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(), + DebugLoc(), + ls.tii->get(TargetOpcode::COPY)) + .addReg(li.reg, RegState::Define) + .addReg(getNewLI()->reg, RegState::Kill); + + ls.lis->InsertMachineInstrInMaps(copy); + + SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); + + // Blow away output range definition. + outRange->valno->def = ls.lis->getInvalidIndex(); + outRange->valno->setIsDefAccurate(false); + li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx); + + VNInfo *newVal = + getNewLI()->getNextValue(SlotIndex(ls.lis->getMBBStartIdx(outBlock), + true), + 0, false, ls.lis->getVNInfoAllocator()); + + getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock), + copyDefIdx, newVal)); + + } + } + + void copyRange(LiveRange &lr) { + std::pair<bool, LoopSplitter::SlotPair> lsr = + ls.getLoopSubRange(lr, loop); + + if (!lsr.first) + return; + + LiveRange loopRange(lsr.second.first, lsr.second.second, + getNewVNI(lr.valno)); + + li.removeRange(loopRange.start, loopRange.end, true); + + getNewLI()->addRange(loopRange); + } + + void copyRanges() { + for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(), + iEnd = loopInstrs.end(); + iItr != iEnd; ++iItr) { + MachineInstr &instr = **iItr; + SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr); + if (instr.modifiesRegister(li.reg, 0)) { + LiveRange *defRange = + li.getLiveRangeContaining(instrIdx.getDefIndex()); + if (defRange != 0) // May have caught this already. + copyRange(*defRange); + } + if (instr.readsRegister(li.reg, 0)) { + LiveRange *useRange = + li.getLiveRangeContaining(instrIdx.getUseIndex()); + if (useRange != 0) { // May have caught this already. + copyRange(*useRange); + } + } + } + + for (MachineLoop::block_iterator bbItr = loop.block_begin(), + bbEnd = loop.block_end(); + bbItr != bbEnd; ++bbItr) { + MachineBasicBlock &loopBlock = **bbItr; + LiveRange *enteringRange = + ls.lis->findEnteringRange(li, &loopBlock); + if (enteringRange != 0) { + copyRange(*enteringRange); + } + } + } + + void renameInside() { + for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(), + iEnd = loopInstrs.end(); + iItr != iEnd; ++iItr) { + MachineInstr &instr = **iItr; + for (unsigned i = 0; i < instr.getNumOperands(); ++i) { + MachineOperand &mop = instr.getOperand(i); + if (mop.isReg() && mop.getReg() == li.reg) { + mop.setReg(getNewLI()->reg); + } + } + } + } + + }; + + void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<MachineDominatorTree>(); + au.addPreserved<MachineDominatorTree>(); + au.addRequired<MachineLoopInfo>(); + au.addPreserved<MachineLoopInfo>(); + au.addPreserved<RegisterCoalescer>(); + au.addPreserved<CalculateSpillWeights>(); + au.addPreserved<LiveStacks>(); + au.addRequired<SlotIndexes>(); + au.addPreserved<SlotIndexes>(); + au.addRequired<LiveIntervals>(); + au.addPreserved<LiveIntervals>(); + MachineFunctionPass::getAnalysisUsage(au); + } + + bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) { + + mf = &fn; + mri = &mf->getRegInfo(); + tii = mf->getTarget().getInstrInfo(); + tri = mf->getTarget().getRegisterInfo(); + sis = &getAnalysis<SlotIndexes>(); + lis = &getAnalysis<LiveIntervals>(); + mli = &getAnalysis<MachineLoopInfo>(); + mdt = &getAnalysis<MachineDominatorTree>(); + + fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + + mf->getFunction()->getName().str(); + + dbgs() << "Splitting " << mf->getFunction()->getName() << "."; + + dumpOddTerminators(); + +// dbgs() << "----------------------------------------\n"; +// lis->dump(); +// dbgs() << "----------------------------------------\n"; + +// std::deque<MachineLoop*> loops; +// std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); +// dbgs() << "Loops:\n"; +// while (!loops.empty()) { +// MachineLoop &loop = *loops.front(); +// loops.pop_front(); +// std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); + +// dumpLoopInfo(loop); +// } + + //lis->dump(); + //exit(0); + + // Setup initial intervals. + for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval *li = liItr->second; + + if (TargetRegisterInfo::isVirtualRegister(li->reg) && + !lis->intervalIsInOneMBB(*li)) { + intervals.push_back(li); + } + } + + processIntervals(); + + intervals.clear(); + +// dbgs() << "----------------------------------------\n"; +// lis->dump(); +// dbgs() << "----------------------------------------\n"; + + dumpOddTerminators(); + + //exit(1); + + return false; + } + + void LoopSplitter::releaseMemory() { + fqn.clear(); + intervals.clear(); + loopRangeMap.clear(); + } + + void LoopSplitter::dumpOddTerminators() { + for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end(); + bbItr != bbEnd; ++bbItr) { + MachineBasicBlock *mbb = &*bbItr; + MachineBasicBlock *a = 0, *b = 0; + SmallVector<MachineOperand, 4> c; + if (tii->AnalyzeBranch(*mbb, a, b, c)) { + dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n"; + dbgs() << " Terminators:\n"; + for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end(); + iItr != iEnd; ++iItr) { + MachineInstr *instr= &*iItr; + dbgs() << " " << *instr << ""; + } + dbgs() << "\n Listed successors: [ "; + for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end(); + sItr != sEnd; ++sItr) { + MachineBasicBlock *succMBB = *sItr; + dbgs() << succMBB->getNumber() << " "; + } + dbgs() << "]\n\n"; + } + } + } + + void LoopSplitter::dumpLoopInfo(MachineLoop &loop) { + MachineBasicBlock &headerBlock = *loop.getHeader(); + typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList; + ExitEdgesList exitEdges; + loop.getExitEdges(exitEdges); + + dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ "; + for (std::vector<MachineBasicBlock*>::const_iterator + subBlockItr = loop.getBlocks().begin(), + subBlockEnd = loop.getBlocks().end(); + subBlockItr != subBlockEnd; ++subBlockItr) { + MachineBasicBlock &subBlock = **subBlockItr; + dbgs() << "BB#" << subBlock.getNumber() << " "; + } + dbgs() << "], Exit edges: [ "; + for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), + exitEdgeEnd = exitEdges.end(); + exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { + MachineLoop::Edge &exitEdge = *exitEdgeItr; + dbgs() << "(MBB#" << exitEdge.first->getNumber() + << ", MBB#" << exitEdge.second->getNumber() << ") "; + } + dbgs() << "], Sub-Loop Headers: [ "; + for (MachineLoop::iterator subLoopItr = loop.begin(), + subLoopEnd = loop.end(); + subLoopItr != subLoopEnd; ++subLoopItr) { + MachineLoop &subLoop = **subLoopItr; + MachineBasicBlock &subLoopBlock = *subLoop.getHeader(); + dbgs() << "BB#" << subLoopBlock.getNumber() << " "; + } + dbgs() << "]\n"; + } + + void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) { + mbb.updateTerminator(); + + for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end(); + miItr != miEnd; ++miItr) { + if (lis->isNotInMIMap(miItr)) { + lis->InsertMachineInstrInMaps(miItr); + } + } + } + + bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) { + MachineBasicBlock *header = loop.getHeader(); + MachineBasicBlock *a = 0, *b = 0; + SmallVector<MachineOperand, 4> c; + + for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(), + pbEnd = header->pred_end(); + pbItr != pbEnd; ++pbItr) { + MachineBasicBlock *predBlock = *pbItr; + if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) { + return false; + } + } + + MachineFunction::iterator headerItr(header); + if (headerItr == mf->begin()) + return true; + MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr); + assert(headerLayoutPred != 0 && "Header should have layout pred."); + + return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c)); + } + + MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) { + assert(loop.getLoopPreheader() == 0 && "Loop already has preheader."); + + MachineBasicBlock &header = *loop.getHeader(); + + // Save the preds - we'll need to update them once we insert the preheader. + typedef std::set<MachineBasicBlock*> HeaderPreds; + HeaderPreds headerPreds; + + for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), + predEnd = header.pred_end(); + predItr != predEnd; ++predItr) { + if (!loop.contains(*predItr)) + headerPreds.insert(*predItr); + } + + assert(!headerPreds.empty() && "No predecessors for header?"); + + //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader..."; + + MachineBasicBlock *preHeader = + mf->CreateMachineBasicBlock(header.getBasicBlock()); + + assert(preHeader != 0 && "Failed to create pre-header."); + + mf->insert(header, preHeader); + + for (HeaderPreds::iterator hpItr = headerPreds.begin(), + hpEnd = headerPreds.end(); + hpItr != hpEnd; ++hpItr) { + assert(*hpItr != 0 && "How'd a null predecessor get into this set?"); + MachineBasicBlock &hp = **hpItr; + hp.ReplaceUsesOfBlockWith(&header, preHeader); + } + preHeader->addSuccessor(&header); + + MachineBasicBlock *oldLayoutPred = + llvm::prior(MachineFunction::iterator(preHeader)); + if (oldLayoutPred != 0) { + updateTerminators(*oldLayoutPred); + } + + lis->InsertMBBInMaps(preHeader); + + if (MachineLoop *parentLoop = loop.getParentLoop()) { + assert(parentLoop->getHeader() != loop.getHeader() && + "Parent loop has same header?"); + parentLoop->addBasicBlockToLoop(preHeader, mli->getBase()); + + // Invalidate all parent loop ranges. + while (parentLoop != 0) { + loopRangeMap.erase(parentLoop); + parentLoop = parentLoop->getParentLoop(); + } + } + + for (LiveIntervals::iterator liItr = lis->begin(), + liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval &li = *liItr->second; + + // Is this safe for physregs? + // TargetRegisterInfo::isPhysicalRegister(li.reg) || + if (!lis->isLiveInToMBB(li, &header)) + continue; + + if (lis->isLiveInToMBB(li, preHeader)) { + assert(lis->isLiveOutOfMBB(li, preHeader) && + "Range terminates in newly added preheader?"); + continue; + } + + bool insertRange = false; + + for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(), + predEnd = preHeader->pred_end(); + predItr != predEnd; ++predItr) { + MachineBasicBlock *predMBB = *predItr; + if (lis->isLiveOutOfMBB(li, predMBB)) { + insertRange = true; + break; + } + } + + if (!insertRange) + continue; + + VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(preHeader), + 0, false, lis->getVNInfoAllocator()); + li.addRange(LiveRange(lis->getMBBStartIdx(preHeader), + lis->getMBBEndIdx(preHeader), + newVal)); + } + + + //dbgs() << "Dumping SlotIndexes:\n"; + //sis->dump(); + + //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n"; + + return *preHeader; + } + + bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) { + assert(edge.first->succ_size() > 1 && "Non-sensical edge."); + if (edge.second->pred_size() > 1) + return true; + return false; + } + + bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) { + MachineFunction::iterator outBlockItr(edge.second); + if (outBlockItr == mf->begin()) + return true; + MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr); + assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin."); + MachineBasicBlock *a = 0, *b = 0; + SmallVector<MachineOperand, 4> c; + return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) && + !tii->AnalyzeBranch(*edge.first, a, b, c)); + } + + MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge, + MachineLoop &loop) { + + MachineBasicBlock &inBlock = *edge.first; + MachineBasicBlock &outBlock = *edge.second; + + assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) && + "Splitting non-critical edge?"); + + //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber() + // << " -> MBB#" << outBlock.getNumber() << ")..."; + + MachineBasicBlock *splitBlock = + mf->CreateMachineBasicBlock(); + + assert(splitBlock != 0 && "Failed to create split block."); + + mf->insert(&outBlock, splitBlock); + + inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock); + splitBlock->addSuccessor(&outBlock); + + MachineBasicBlock *oldLayoutPred = + llvm::prior(MachineFunction::iterator(splitBlock)); + if (oldLayoutPred != 0) { + updateTerminators(*oldLayoutPred); + } + + lis->InsertMBBInMaps(splitBlock); + + loopRangeMap.erase(&loop); + + MachineLoop *splitParentLoop = loop.getParentLoop(); + while (splitParentLoop != 0 && + !splitParentLoop->contains(&outBlock)) { + splitParentLoop = splitParentLoop->getParentLoop(); + } + + if (splitParentLoop != 0) { + assert(splitParentLoop->contains(&loop) && + "Split-block parent doesn't contain original loop?"); + splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase()); + + // Invalidate all parent loop ranges. + while (splitParentLoop != 0) { + loopRangeMap.erase(splitParentLoop); + splitParentLoop = splitParentLoop->getParentLoop(); + } + } + + + for (LiveIntervals::iterator liItr = lis->begin(), + liEnd = lis->end(); + liItr != liEnd; ++liItr) { + LiveInterval &li = *liItr->second; + bool intersects = lis->isLiveOutOfMBB(li, &inBlock) && + lis->isLiveInToMBB(li, &outBlock); + if (lis->isLiveInToMBB(li, splitBlock)) { + if (!intersects) { + li.removeRange(lis->getMBBStartIdx(splitBlock), + lis->getMBBEndIdx(splitBlock), true); + } + } else if (intersects) { + VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(splitBlock), + 0, false, lis->getVNInfoAllocator()); + li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock), + lis->getMBBEndIdx(splitBlock), + newVal)); + } + } + + //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n"; + + return *splitBlock; + } + + LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) { + typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet; + LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop); + if (lrItr == loopRangeMap.end()) { + LoopMBBSet loopMBBs((StartSlotComparator(*lis))); + std::copy(loop.block_begin(), loop.block_end(), + std::inserter(loopMBBs, loopMBBs.begin())); + + assert(!loopMBBs.empty() && "No blocks in loop?"); + + LoopRanges &loopRanges = loopRangeMap[&loop]; + assert(loopRanges.empty() && "Loop encountered but not processed?"); + SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin()); + loopRanges.push_back( + std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()), + lis->getInvalidIndex())); + for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()), + curBlockEnd = loopMBBs.end(); + curBlockItr != curBlockEnd; ++curBlockItr) { + SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr); + if (newStart != oldEnd) { + loopRanges.back().second = oldEnd; + loopRanges.push_back(std::make_pair(newStart, + lis->getInvalidIndex())); + } + oldEnd = lis->getMBBEndIdx(*curBlockItr); + } + + loopRanges.back().second = + lis->getMBBEndIdx(*llvm::prior(loopMBBs.end())); + + return loopRanges; + } + return lrItr->second; + } + + std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange( + const LiveRange &lr, + MachineLoop &loop) { + LoopRanges &loopRanges = getLoopRanges(loop); + LoopRanges::iterator lrItr = loopRanges.begin(), + lrEnd = loopRanges.end(); + while (lrItr != lrEnd && lr.start >= lrItr->second) { + ++lrItr; + } + + if (lrItr == lrEnd) { + SlotIndex invalid = lis->getInvalidIndex(); + return std::make_pair(false, SlotPair(invalid, invalid)); + } + + SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start); + SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end); + + return std::make_pair(true, SlotPair(srStart, srEnd)); + } + + void LoopSplitter::dumpLoopRanges(MachineLoop &loop) { + LoopRanges &loopRanges = getLoopRanges(loop); + dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ "; + for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end(); + lrItr != lrEnd; ++lrItr) { + dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") "; + } + dbgs() << "]\n"; + } + + void LoopSplitter::processHeader(LoopSplit &split) { + MachineBasicBlock &header = *split.getLoop().getHeader(); + //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n"; + + if (!lis->isLiveInToMBB(split.getLI(), &header)) + return; // Not live in, but nothing wrong so far. + + MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader(); + if (!preHeader) { + + if (!canInsertPreHeader(split.getLoop())) { + split.invalidate(); + return; // Couldn't insert a pre-header. Bail on this interval. + } + + for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), + predEnd = header.pred_end(); + predItr != predEnd; ++predItr) { + if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) { + split.splitIncoming(); + break; + } + } + } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) { + split.splitIncoming(); + } + } + + void LoopSplitter::processLoopExits(LoopSplit &split) { + typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList; + ExitEdgesList exitEdges; + split.getLoop().getExitEdges(exitEdges); + + //dbgs() << " Processing loop exits:\n"; + + for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), + exitEdgeEnd = exitEdges.end(); + exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { + MachineLoop::Edge exitEdge = *exitEdgeItr; + + LiveRange *outRange = + split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second)); + + if (outRange != 0) { + if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) { + split.invalidate(); + return; + } + + split.splitOutgoing(exitEdge); + } + } + } + + void LoopSplitter::processLoopUses(LoopSplit &split) { + std::set<MachineInstr*> processed; + + for (MachineRegisterInfo::reg_iterator + rItr = mri->reg_begin(split.getLI().reg), + rEnd = mri->reg_end(); + rItr != rEnd; ++rItr) { + MachineInstr &instr = *rItr; + if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) { + split.addLoopInstr(&instr); + processed.insert(&instr); + } + } + + //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg + // << " in blocks [ "; + //dbgs() << "]\n"; + } + + bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) { + assert(TargetRegisterInfo::isVirtualRegister(li.reg) && + "Attempt to split physical register."); + + LoopSplit split(*this, li, loop); + processHeader(split); + if (split.isValid()) + processLoopExits(split); + if (split.isValid()) + processLoopUses(split); + if (split.isValid() /* && split.isWorthwhile() */) { + split.apply(); + DEBUG(dbgs() << "Success.\n"); + return true; + } + DEBUG(dbgs() << "Failed.\n"); + return false; + } + + void LoopSplitter::processInterval(LiveInterval &li) { + std::deque<MachineLoop*> loops; + std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); + + while (!loops.empty()) { + MachineLoop &loop = *loops.front(); + loops.pop_front(); + DEBUG( + dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#" + << loop.getHeader()->getNumber() << " "; + ); + if (!splitOverLoop(li, loop)) { + // Couldn't split over outer loop, schedule sub-loops to be checked. + std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); + } + } + } + + void LoopSplitter::processIntervals() { + while (!intervals.empty()) { + LiveInterval &li = *intervals.front(); + intervals.pop_front(); + + assert(!lis->intervalIsInOneMBB(li) && + "Single interval in process worklist."); + + processInterval(li); + } + } + +} diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h new file mode 100644 index 0000000000000..a726a7b834fbe --- /dev/null +++ b/lib/CodeGen/Splitter.h @@ -0,0 +1,99 @@ +//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SPLITTER_H +#define LLVM_CODEGEN_SPLITTER_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" + +#include <deque> +#include <map> +#include <string> +#include <vector> + +namespace llvm { + + class LiveInterval; + class LiveIntervals; + struct LiveRange; + class LoopSplit; + class MachineDominatorTree; + class MachineRegisterInfo; + class SlotIndexes; + class TargetInstrInfo; + class VNInfo; + + class LoopSplitter : public MachineFunctionPass { + friend class LoopSplit; + public: + static char ID; + + LoopSplitter() : MachineFunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + virtual bool runOnMachineFunction(MachineFunction &fn); + + virtual void releaseMemory(); + + + private: + + MachineFunction *mf; + LiveIntervals *lis; + MachineLoopInfo *mli; + MachineRegisterInfo *mri; + MachineDominatorTree *mdt; + SlotIndexes *sis; + const TargetInstrInfo *tii; + const TargetRegisterInfo *tri; + + std::string fqn; + std::deque<LiveInterval*> intervals; + + typedef std::pair<SlotIndex, SlotIndex> SlotPair; + typedef std::vector<SlotPair> LoopRanges; + typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap; + LoopRangeMap loopRangeMap; + + void dumpLoopInfo(MachineLoop &loop); + + void dumpOddTerminators(); + + void updateTerminators(MachineBasicBlock &mbb); + + bool canInsertPreHeader(MachineLoop &loop); + MachineBasicBlock& insertPreHeader(MachineLoop &loop); + + bool isCriticalEdge(MachineLoop::Edge &edge); + bool canSplitEdge(MachineLoop::Edge &edge); + MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop); + + LoopRanges& getLoopRanges(MachineLoop &loop); + std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr, + MachineLoop &loop); + + void dumpLoopRanges(MachineLoop &loop); + + void processHeader(LoopSplit &split); + void processLoopExits(LoopSplit &split); + void processLoopUses(LoopSplit &split); + + bool splitOverLoop(LiveInterval &li, MachineLoop &loop); + + void processInterval(LiveInterval &li); + + void processIntervals(); + }; + +} + +#endif diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index ca5c28ce010cb..9f51778da7562 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -62,17 +62,17 @@ namespace { bool RequiresStackProtector() const; public: static char ID; // Pass identification, replacement for typeid. - StackProtector() : FunctionPass(&ID), TLI(0) {} + StackProtector() : FunctionPass(ID), TLI(0) {} StackProtector(const TargetLowering *tli) - : FunctionPass(&ID), TLI(tli) {} + : FunctionPass(ID), TLI(tli) {} virtual bool runOnFunction(Function &Fn); }; } // end anonymous namespace char StackProtector::ID = 0; -static RegisterPass<StackProtector> -X("stack-protector", "Insert stack protectors"); +INITIALIZE_PASS(StackProtector, "stack-protector", + "Insert stack protectors", false, false); FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) { return new StackProtector(tli); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index eff3c33e3daa0..8d57ae95dde2a 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -95,9 +95,9 @@ namespace { public: static char ID; // Pass identification StackSlotColoring() : - MachineFunctionPass(&ID), ColorWithRegs(false), NextColor(-1) {} + MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {} StackSlotColoring(bool RegColor) : - MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {} + MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -119,7 +119,6 @@ namespace { private: void InitializeSlots(); - bool CheckForSetJmpCall(const MachineFunction &MF) const; void ScanForSpillSlotRefs(MachineFunction &MF); bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); @@ -146,8 +145,8 @@ namespace { char StackSlotColoring::ID = 0; -static RegisterPass<StackSlotColoring> -X("stack-slot-coloring", "Stack Slot Coloring"); +INITIALIZE_PASS(StackSlotColoring, "stack-slot-coloring", + "Stack Slot Coloring", false, false); FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) { return new StackSlotColoring(RegColor); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 59315cf67282b..894dbfa28bac7 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -39,7 +39,7 @@ using namespace llvm; namespace { struct StrongPHIElimination : public MachineFunctionPass { static char ID; // Pass identification, replacement for typeid - StrongPHIElimination() : MachineFunctionPass(&ID) {} + StrongPHIElimination() : MachineFunctionPass(ID) {} // Waiting stores, for each MBB, the set of copies that need to // be inserted into that MBB @@ -150,11 +150,10 @@ namespace { } char StrongPHIElimination::ID = 0; -static RegisterPass<StrongPHIElimination> -X("strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently"); +INITIALIZE_PASS(StrongPHIElimination, "strong-phi-node-elimination", + "Eliminate PHI nodes for register allocation, intelligently", false, false); -const PassInfo *const llvm::StrongPHIEliminationID = &X; +char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; /// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree /// of the given MachineFunction. These numbers are then used in other parts diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 075db803bd231..a815b364d54e7 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -69,7 +69,7 @@ namespace { public: static char ID; explicit TailDuplicatePass(bool PreRA) : - MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} + MachineFunctionPass(ID), PreRegAlloc(PreRA) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "Tail Duplication"; } @@ -254,14 +254,15 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { // SSA form. for (unsigned i = 0, e = Copies.size(); i != e; ++i) { MachineInstr *Copy = Copies[i]; - unsigned Src, Dst, SrcSR, DstSR; - if (TII->isMoveInstr(*Copy, Src, Dst, SrcSR, DstSR)) { - MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); - if (++UI == MRI->use_end()) { - // Copy is the only use. Do trivial copy propagation here. - MRI->replaceRegWith(Dst, Src); - Copy->eraseFromParent(); - } + if (!Copy->isCopy()) + continue; + unsigned Dst = Copy->getOperand(0).getReg(); + unsigned Src = Copy->getOperand(1).getReg(); + MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); + if (++UI == MRI->use_end()) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); } } diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index cdacb98e0e883..6e4a0d837ecd3 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -178,19 +178,6 @@ MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig, return MF.CloneMachineInstr(Orig); } -unsigned -TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { - unsigned FnSize = 0; - for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - const MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); - I != E; ++I) - FnSize += GetInstSizeInBytes(I); - } - return FnSize; -} - // If the COPY instruction in MI can be folded to a stack operation, return // the register class to use. static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index a80cfc4b256fe..f1e10eec724c6 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -519,11 +519,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, ConstTextCoalSection = getContext().getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); - ConstDataCoalSection - = getContext().getMachOSection("__DATA","__const_coal", - MCSectionMachO::S_COALESCED, - SectionKind::getText()); + SectionKind::getReadOnly()); ConstDataSection // .const_data = getContext().getMachOSection("__DATA", "__const", 0, SectionKind::getReadOnlyWithRel()); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 564914373bb56..78989c567e42f 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -138,7 +138,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - TwoAddressInstructionPass() : MachineFunctionPass(&ID) {} + TwoAddressInstructionPass() : MachineFunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -159,10 +159,10 @@ namespace { } char TwoAddressInstructionPass::ID = 0; -static RegisterPass<TwoAddressInstructionPass> -X("twoaddressinstruction", "Two-Address instruction pass"); +INITIALIZE_PASS(TwoAddressInstructionPass, "twoaddressinstruction", + "Two-Address instruction pass", false, false); -const PassInfo *const llvm::TwoAddressInstructionPassID = &X; +char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; /// Sink3AddrInstruction - A two-address instruction has been converted to a /// three-address instruction to avoid clobbering a register. Try to sink it @@ -380,26 +380,18 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, bool &IsSrcPhys, bool &IsDstPhys) { SrcReg = 0; DstReg = 0; - unsigned SrcSubIdx, DstSubIdx; - if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (MI.isCopy()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - } else if (MI.isInsertSubreg()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - } else if (MI.isSubregToReg()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - } - } + if (MI.isCopy()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + } else if (MI.isInsertSubreg() || MI.isSubregToReg()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + } else + return false; - if (DstReg) { - IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - return true; - } - return false; + IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return true; } /// isKilled - Test if the given register value, which is used by the given @@ -1454,7 +1446,17 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { // // If the REG_SEQUENCE doesn't kill its source, keeping live variables // correctly up to date becomes very difficult. Insert a copy. - // + + // Defer any kill flag to the last operand using SrcReg. Otherwise, we + // might insert a COPY that uses SrcReg after is was killed. + if (isKill) + for (unsigned j = i + 2; j < e; j += 2) + if (MI->getOperand(j).getReg() == SrcReg) { + MI->getOperand(j).setIsKill(); + isKill = false; + break; + } + MachineBasicBlock::iterator InsertLoc = MI; MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 7b338126d475f..6dd333358bc44 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -43,7 +43,7 @@ namespace { virtual bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - UnreachableBlockElim() : FunctionPass(&ID) {} + UnreachableBlockElim() : FunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<ProfileInfo>(); @@ -51,8 +51,8 @@ namespace { }; } char UnreachableBlockElim::ID = 0; -static RegisterPass<UnreachableBlockElim> -X("unreachableblockelim", "Remove unreachable blocks from the CFG"); +INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim", + "Remove unreachable blocks from the CFG", false, false); FunctionPass *llvm::createUnreachableBlockEliminationPass() { return new UnreachableBlockElim(); @@ -100,16 +100,15 @@ namespace { MachineModuleInfo *MMI; public: static char ID; // Pass identification, replacement for typeid - UnreachableMachineBlockElim() : MachineFunctionPass(&ID) {} + UnreachableMachineBlockElim() : MachineFunctionPass(ID) {} }; } char UnreachableMachineBlockElim::ID = 0; -static RegisterPass<UnreachableMachineBlockElim> -Y("unreachable-mbb-elimination", - "Remove unreachable machine basic blocks"); +INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination", + "Remove unreachable machine basic blocks", false, false); -const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y; +char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID; void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineLoopInfo>(); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index ed0269695dfe0..20ffcffa70d35 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -48,8 +48,7 @@ STATISTIC(NumSpills , "Number of register spills"); char VirtRegMap::ID = 0; -static RegisterPass<VirtRegMap> -X("virtregmap", "Virtual Register Map"); +INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false); bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { MRI = &mf.getRegInfo(); diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index a5599f68b64e6..8b6082d181932 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -139,7 +139,7 @@ namespace llvm { public: static char ID; - VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG), + VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), Virt2StackSlotMap(NO_STACK_SLOT), Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0), Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL), @@ -152,6 +152,11 @@ namespace llvm { MachineFunctionPass::getAnalysisUsage(AU); } + MachineFunction &getMachineFunction() const { + assert(MF && "getMachineFunction called before runOnMAchineFunction"); + return *MF; + } + void grow(); /// @brief returns true if the specified virtual register is diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 57a1500e6e9da..240d28cf30114 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -67,23 +67,16 @@ VirtRegRewriter::~VirtRegRewriter() {} /// Note that operands may be added, so the MO reference is no longer valid. static void substitutePhysReg(MachineOperand &MO, unsigned Reg, const TargetRegisterInfo &TRI) { - if (unsigned SubIdx = MO.getSubReg()) { - // Insert the physical subreg and reset the subreg field. - MO.setReg(TRI.getSubReg(Reg, SubIdx)); - MO.setSubReg(0); - - // Any def, dead, and kill flags apply to the full virtual register, so they - // also apply to the full physical register. Add imp-def/dead and imp-kill - // as needed. + if (MO.getSubReg()) { + MO.substPhysReg(Reg, TRI); + + // Any kill flags apply to the full virtual register, so they also apply to + // the full physical register. + // We assume that partial defs have already been decorated with a super-reg + // <imp-def> operand by LiveIntervals. MachineInstr &MI = *MO.getParent(); - if (MO.isDef()) - if (MO.isDead()) - MI.addRegisterDead(Reg, &TRI, /*AddIfNotFound=*/ true); - else - MI.addRegisterDefined(Reg, &TRI); - else if (!MO.isUndef() && - (MO.isKill() || - MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0)))) + if (MO.isUse() && !MO.isUndef() && + (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0)))) MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true); } else { MO.setReg(Reg); @@ -460,7 +453,7 @@ public: /// blocks each of which is a successor of the specified BB and has no other /// predecessor. static void findSinglePredSuccessor(MachineBasicBlock *MBB, - SmallVectorImpl<MachineBasicBlock *> &Succs) { + SmallVectorImpl<MachineBasicBlock *> &Succs){ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { MachineBasicBlock *SuccMBB = *SI; @@ -852,8 +845,8 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, // Yup, use the reload register that we didn't use before. unsigned NewReg = Op.AssignedPhysReg; Rejected.insert(PhysReg); - return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, Rejected, - RegKills, KillOps, VRM); + return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps, VRM); } else { // Otherwise, we might also have a problem if a previously reused // value aliases the new register. If so, codegen the previous reload @@ -1864,7 +1857,7 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) { /// rewriteMBB - Keep track of which spills are available even after the -/// register allocator is done with them. If possible, avid reloading vregs. +/// register allocator is done with them. If possible, avoid reloading vregs. void LocalRewriter::RewriteMBB(LiveIntervals *LIs, AvailableSpills &Spills, BitVector &RegKills, @@ -1914,7 +1907,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, if (InsertSpills(MII)) NextMII = llvm::next(MII); - VirtRegMap::MI2VirtMapTy::const_iterator I, End; bool Erased = false; bool BackTracked = false; MachineInstr &MI = *MII; @@ -2028,14 +2020,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, CanReuse = !ReusedOperands.isClobbered(PhysReg) && Spills.canClobberPhysReg(PhysReg); } - // If this is an asm, and PhysReg is used elsewhere as an earlyclobber - // operand, we can't also use it as an input. (Outputs always come - // before inputs, so we can stop looking at i.) + // If this is an asm, and a PhysReg alias is used elsewhere as an + // earlyclobber operand, we can't also use it as an input. if (MI.isInlineAsm()) { - for (unsigned k=0; k<i; ++k) { + for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) { MachineOperand &MOk = MI.getOperand(k); - if (MOk.isReg() && MOk.getReg()==PhysReg && MOk.isEarlyClobber()) { + if (MOk.isReg() && MOk.isEarlyClobber() && + TRI->regsOverlap(MOk.getReg(), PhysReg)) { CanReuse = false; + DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) + << " for vreg" << VirtReg << ": " << MOk << '\n'); break; } } @@ -2248,15 +2242,22 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // If we have folded references to memory operands, make sure we clear all // physical registers that may contain the value of the spilled virtual // register + + // Copy the folded virts to a small vector, we may change MI2VirtMap. + SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts; + // C++0x FTW! + for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator, + VirtRegMap::MI2VirtMapTy::const_iterator> FVRange = + VRM->getFoldedVirts(&MI); + FVRange.first != FVRange.second; ++FVRange.first) + FoldedVirts.push_back(FVRange.first->second); + SmallSet<int, 2> FoldedSS; - for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) { - unsigned VirtReg = I->second.first; - VirtRegMap::ModRef MR = I->second.second; + for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) { + unsigned VirtReg = FoldedVirts[FVI].first; + VirtRegMap::ModRef MR = FoldedVirts[FVI].second; DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR); - // MI2VirtMap be can updated which invalidate the iterator. - // Increment the iterator first. - ++I; int SS = VRM->getStackSlot(VirtReg); if (SS == VirtRegMap::NO_STACK_SLOT) continue; @@ -2302,7 +2303,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); SmallVector<MachineInstr*, 4> NewMIs; if (PhysReg && - TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) { + TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){ MBB->insert(MII, NewMIs[0]); InvalidateKills(MI, TRI, RegKills, KillOps); VRM->RemoveMachineInstrFromMaps(&MI); @@ -2442,28 +2443,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, Spills.disallowClobberPhysReg(VirtReg); goto ProcessNextInst; } - unsigned Src, Dst, SrcSR, DstSR; - if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && - Src == Dst && SrcSR == DstSR && - !MI.findRegisterUseOperand(Src)->isUndef()) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - SmallVector<unsigned, 2> KillRegs; - InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); - if (MO.isDead() && !KillRegs.empty()) { - // Source register or an implicit super/sub-register use is killed. - assert(KillRegs[0] == Dst || - TRI->isSubRegister(KillRegs[0], Dst) || - TRI->isSuperRegister(KillRegs[0], Dst)); - // Last def is now dead. - TransferDeadness(Src, RegKills, KillOps); - } - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); - Erased = true; - Spills.disallowClobberPhysReg(VirtReg); - goto ProcessNextInst; - } // If it's not a no-op copy, it clobbers the value in the destreg. Spills.ClobberPhysReg(VirtReg); @@ -2541,20 +2520,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, UpdateKills(*LastStore, TRI, RegKills, KillOps); goto ProcessNextInst; } - { - unsigned Src, Dst, SrcSR, DstSR; - if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && - Src == Dst && SrcSR == DstSR) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - InvalidateKills(MI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); - Erased = true; - UpdateKills(*LastStore, TRI, RegKills, KillOps); - goto ProcessNextInst; - } - } } } ProcessNextInst: diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp index 5f30dce5855fd..0be80496a3cb7 100644 --- a/lib/CompilerDriver/Action.cpp +++ b/lib/CompilerDriver/Action.cpp @@ -13,6 +13,7 @@ #include "llvm/CompilerDriver/Action.h" #include "llvm/CompilerDriver/BuiltinOptions.h" +#include "llvm/CompilerDriver/Error.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/SystemUtils.h" @@ -58,11 +59,15 @@ namespace { if (prog.isEmpty()) { prog = FindExecutable(name, ProgramName, (void *)(intptr_t)&Main); - if (prog.isEmpty()) - throw std::runtime_error("Can't find program '" + name + "'"); + if (prog.isEmpty()) { + PrintError("Can't find program '" + name + "'"); + return -1; + } + } + if (!prog.canExecute()) { + PrintError("Program '" + name + "' is not executable."); + return -1; } - if (!prog.canExecute()) - throw std::runtime_error("Program '" + name + "' is not executable."); // Build the command line vector and the redirects array. const sys::Path* redirects[3] = {0,0,0}; diff --git a/lib/CompilerDriver/BuiltinOptions.cpp b/lib/CompilerDriver/BuiltinOptions.cpp index d1ac8c98322c1..38442038d7385 100644 --- a/lib/CompilerDriver/BuiltinOptions.cpp +++ b/lib/CompilerDriver/BuiltinOptions.cpp @@ -19,7 +19,7 @@ namespace cl = llvm::cl; -// External linkage here is intentional. +namespace llvmc { cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input file>"), cl::ZeroOrMore); @@ -57,3 +57,5 @@ cl::opt<SaveTempsEnum::Values> SaveTemps clEnumValN(SaveTempsEnum::Obj, "", "Same as 'cwd'"), clEnumValEnd), cl::ValueOptional); + +} // End namespace llvmc. diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp index 7d1c7fe4a62b9..d0c0e15bcdb7b 100644 --- a/lib/CompilerDriver/CompilationGraph.cpp +++ b/lib/CompilerDriver/CompilationGraph.cpp @@ -25,39 +25,46 @@ #include <iterator> #include <limits> #include <queue> -#include <stdexcept> using namespace llvm; using namespace llvmc; namespace llvmc { - const std::string& LanguageMap::GetLanguage(const sys::Path& File) const { + const std::string* LanguageMap::GetLanguage(const sys::Path& File) const { StringRef suf = File.getSuffix(); LanguageMap::const_iterator Lang = this->find(suf.empty() ? "*empty*" : suf); - if (Lang == this->end()) - throw std::runtime_error("File '" + File.str() + - "' has unknown suffix '" + suf.str() + '\''); - return Lang->second; + if (Lang == this->end()) { + PrintError("File '" + File.str() + "' has unknown suffix '" + + suf.str() + '\''); + return 0; + } + return &Lang->second; } } namespace { - /// ChooseEdge - Return the edge with the maximum weight. + /// ChooseEdge - Return the edge with the maximum weight. Returns 0 on error. template <class C> const Edge* ChooseEdge(const C& EdgesContainer, const InputLanguagesSet& InLangs, const std::string& NodeName = "root") { const Edge* MaxEdge = 0; - unsigned MaxWeight = 0; + int MaxWeight = 0; bool SingleMax = true; + // TODO: fix calculation of SingleMax. for (typename C::const_iterator B = EdgesContainer.begin(), E = EdgesContainer.end(); B != E; ++B) { const Edge* e = B->getPtr(); - unsigned EW = e->Weight(InLangs); + int EW = e->Weight(InLangs); + if (EW < 0) { + // (error) invocation in TableGen -> we don't need to print an error + // message. + return 0; + } if (EW > MaxWeight) { MaxEdge = e; MaxWeight = EW; @@ -67,14 +74,16 @@ namespace { } } - if (!SingleMax) - throw std::runtime_error("Node " + NodeName + - ": multiple maximal outward edges found!" - " Most probably a specification error."); - if (!MaxEdge) - throw std::runtime_error("Node " + NodeName + - ": no maximal outward edge found!" - " Most probably a specification error."); + if (!SingleMax) { + PrintError("Node " + NodeName + ": multiple maximal outward edges found!" + " Most probably a specification error."); + return 0; + } + if (!MaxEdge) { + PrintError("Node " + NodeName + ": no maximal outward edge found!" + " Most probably a specification error."); + return 0; + } return MaxEdge; } @@ -98,29 +107,34 @@ CompilationGraph::CompilationGraph() { NodesMap["root"] = Node(this); } -Node& CompilationGraph::getNode(const std::string& ToolName) { +Node* CompilationGraph::getNode(const std::string& ToolName) { nodes_map_type::iterator I = NodesMap.find(ToolName); - if (I == NodesMap.end()) - throw std::runtime_error("Node " + ToolName + " is not in the graph"); - return I->second; + if (I == NodesMap.end()) { + PrintError("Node " + ToolName + " is not in the graph"); + return 0; + } + return &I->second; } -const Node& CompilationGraph::getNode(const std::string& ToolName) const { +const Node* CompilationGraph::getNode(const std::string& ToolName) const { nodes_map_type::const_iterator I = NodesMap.find(ToolName); - if (I == NodesMap.end()) - throw std::runtime_error("Node " + ToolName + " is not in the graph!"); - return I->second; + if (I == NodesMap.end()) { + PrintError("Node " + ToolName + " is not in the graph!"); + return 0; + } + return &I->second; } // Find the tools list corresponding to the given language name. -const CompilationGraph::tools_vector_type& +const CompilationGraph::tools_vector_type* CompilationGraph::getToolsVector(const std::string& LangName) const { tools_map_type::const_iterator I = ToolsMap.find(LangName); - if (I == ToolsMap.end()) - throw std::runtime_error("No tool corresponding to the language " - + LangName + " found"); - return I->second; + if (I == ToolsMap.end()) { + PrintError("No tool corresponding to the language " + LangName + " found"); + return 0; + } + return &I->second; } void CompilationGraph::insertNode(Tool* V) { @@ -128,29 +142,37 @@ void CompilationGraph::insertNode(Tool* V) { NodesMap[V->Name()] = Node(this, V); } -void CompilationGraph::insertEdge(const std::string& A, Edge* Edg) { - Node& B = getNode(Edg->ToolName()); +int CompilationGraph::insertEdge(const std::string& A, Edge* Edg) { + Node* B = getNode(Edg->ToolName()); + if (B == 0) + return 1; + if (A == "root") { - const char** InLangs = B.ToolPtr->InputLanguages(); + const char** InLangs = B->ToolPtr->InputLanguages(); for (;*InLangs; ++InLangs) ToolsMap[*InLangs].push_back(IntrusiveRefCntPtr<Edge>(Edg)); NodesMap["root"].AddEdge(Edg); } else { - Node& N = getNode(A); - N.AddEdge(Edg); + Node* N = getNode(A); + if (N == 0) + return 1; + + N->AddEdge(Edg); } // Increase the inward edge counter. - B.IncrInEdges(); + B->IncrInEdges(); + + return 0; } // Pass input file through the chain until we bump into a Join node or // a node that says that it is the last. -void CompilationGraph::PassThroughGraph (const sys::Path& InFile, - const Node* StartNode, - const InputLanguagesSet& InLangs, - const sys::Path& TempDir, - const LanguageMap& LangMap) const { +int CompilationGraph::PassThroughGraph (const sys::Path& InFile, + const Node* StartNode, + const InputLanguagesSet& InLangs, + const sys::Path& TempDir, + const LanguageMap& LangMap) const { sys::Path In = InFile; const Node* CurNode = StartNode; @@ -158,25 +180,35 @@ void CompilationGraph::PassThroughGraph (const sys::Path& InFile, Tool* CurTool = CurNode->ToolPtr.getPtr(); if (CurTool->IsJoin()) { - JoinTool& JT = dynamic_cast<JoinTool&>(*CurTool); + JoinTool& JT = static_cast<JoinTool&>(*CurTool); JT.AddToJoinList(In); break; } - Action CurAction = CurTool->GenerateAction(In, CurNode->HasChildren(), - TempDir, InLangs, LangMap); + Action CurAction; + if (int ret = CurTool->GenerateAction(CurAction, In, CurNode->HasChildren(), + TempDir, InLangs, LangMap)) { + return ret; + } if (int ret = CurAction.Execute()) - throw error_code(ret); + return ret; if (CurAction.StopCompilation()) - return; + return 0; + + const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name()); + if (Edg == 0) + return 1; + + CurNode = getNode(Edg->ToolName()); + if (CurNode == 0) + return 1; - CurNode = &getNode(ChooseEdge(CurNode->OutEdges, - InLangs, - CurNode->Name())->ToolName()); In = CurAction.OutFile(); } + + return 0; } // Find the head of the toolchain corresponding to the given file. @@ -186,26 +218,39 @@ FindToolChain(const sys::Path& In, const std::string* ForceLanguage, InputLanguagesSet& InLangs, const LanguageMap& LangMap) const { // Determine the input language. - const std::string& InLanguage = - ForceLanguage ? *ForceLanguage : LangMap.GetLanguage(In); + const std::string* InLang = LangMap.GetLanguage(In); + if (InLang == 0) + return 0; + const std::string& InLanguage = (ForceLanguage ? *ForceLanguage : *InLang); // Add the current input language to the input language set. InLangs.insert(InLanguage); // Find the toolchain for the input language. - const tools_vector_type& TV = getToolsVector(InLanguage); - if (TV.empty()) - throw std::runtime_error("No toolchain corresponding to language " - + InLanguage + " found"); - return &getNode(ChooseEdge(TV, InLangs)->ToolName()); + const tools_vector_type* pTV = getToolsVector(InLanguage); + if (pTV == 0) + return 0; + + const tools_vector_type& TV = *pTV; + if (TV.empty()) { + PrintError("No toolchain corresponding to language " + + InLanguage + " found"); + return 0; + } + + const Edge* Edg = ChooseEdge(TV, InLangs); + if (Edg == 0) + return 0; + + return getNode(Edg->ToolName()); } // Helper function used by Build(). // Traverses initial portions of the toolchains (up to the first Join node). // This function is also responsible for handling the -x option. -void CompilationGraph::BuildInitial (InputLanguagesSet& InLangs, - const sys::Path& TempDir, - const LanguageMap& LangMap) { +int CompilationGraph::BuildInitial (InputLanguagesSet& InLangs, + const sys::Path& TempDir, + const LanguageMap& LangMap) { // This is related to -x option handling. cl::list<std::string>::const_iterator xIter = Languages.begin(), xBegin = xIter, xEnd = Languages.end(); @@ -255,15 +300,25 @@ void CompilationGraph::BuildInitial (InputLanguagesSet& InLangs, // Find the toolchain corresponding to this file. const Node* N = FindToolChain(In, xLanguage, InLangs, LangMap); + if (N == 0) + return 1; // Pass file through the chain starting at head. - PassThroughGraph(In, N, InLangs, TempDir, LangMap); + if (int ret = PassThroughGraph(In, N, InLangs, TempDir, LangMap)) + return ret; } + + return 0; } // Sort the nodes in topological order. -void CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) { +int CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) { std::queue<const Node*> Q; - Q.push(&getNode("root")); + + Node* Root = getNode("root"); + if (Root == 0) + return 1; + + Q.push(Root); while (!Q.empty()) { const Node* A = Q.front(); @@ -271,12 +326,17 @@ void CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) { Out.push_back(A); for (Node::const_iterator EB = A->EdgesBegin(), EE = A->EdgesEnd(); EB != EE; ++EB) { - Node* B = &getNode((*EB)->ToolName()); + Node* B = getNode((*EB)->ToolName()); + if (B == 0) + return 1; + B->DecrInEdges(); if (B->HasNoInEdges()) Q.push(B); } } + + return 0; } namespace { @@ -287,49 +347,71 @@ namespace { // Call TopologicalSort and filter the resulting list to include // only Join nodes. -void CompilationGraph:: +int CompilationGraph:: TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out) { std::vector<const Node*> TopSorted; - TopologicalSort(TopSorted); + if (int ret = TopologicalSort(TopSorted)) + return ret; std::remove_copy_if(TopSorted.begin(), TopSorted.end(), std::back_inserter(Out), NotJoinNode); + + return 0; } int CompilationGraph::Build (const sys::Path& TempDir, const LanguageMap& LangMap) { - InputLanguagesSet InLangs; + bool WasSomeActionGenerated = !InputFilenames.empty(); // Traverse initial parts of the toolchains and fill in InLangs. - BuildInitial(InLangs, TempDir, LangMap); + if (int ret = BuildInitial(InLangs, TempDir, LangMap)) + return ret; std::vector<const Node*> JTV; - TopologicalSortFilterJoinNodes(JTV); + if (int ret = TopologicalSortFilterJoinNodes(JTV)) + return ret; // For all join nodes in topological order: for (std::vector<const Node*>::iterator B = JTV.begin(), E = JTV.end(); B != E; ++B) { const Node* CurNode = *B; - JoinTool* JT = &dynamic_cast<JoinTool&>(*CurNode->ToolPtr.getPtr()); + JoinTool* JT = &static_cast<JoinTool&>(*CurNode->ToolPtr.getPtr()); // Are there any files in the join list? if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty())) continue; - Action CurAction = JT->GenerateAction(CurNode->HasChildren(), - TempDir, InLangs, LangMap); + WasSomeActionGenerated = true; + Action CurAction; + if (int ret = JT->GenerateAction(CurAction, CurNode->HasChildren(), + TempDir, InLangs, LangMap)) { + return ret; + } if (int ret = CurAction.Execute()) - throw error_code(ret); + return ret; if (CurAction.StopCompilation()) return 0; - const Node* NextNode = &getNode(ChooseEdge(CurNode->OutEdges, InLangs, - CurNode->Name())->ToolName()); - PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode, - InLangs, TempDir, LangMap); + const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name()); + if (Edg == 0) + return 1; + + const Node* NextNode = getNode(Edg->ToolName()); + if (NextNode == 0) + return 1; + + if (int ret = PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode, + InLangs, TempDir, LangMap)) { + return ret; + } + } + + if (!WasSomeActionGenerated) { + PrintError("no input files"); + return 1; } return 0; @@ -337,6 +419,7 @@ int CompilationGraph::Build (const sys::Path& TempDir, int CompilationGraph::CheckLanguageNames() const { int ret = 0; + // Check that names for output and input languages on all edges do match. for (const_nodes_iterator B = this->NodesMap.begin(), E = this->NodesMap.end(); B != E; ++B) { @@ -345,9 +428,11 @@ int CompilationGraph::CheckLanguageNames() const { if (N1.ToolPtr) { for (Node::const_iterator EB = N1.EdgesBegin(), EE = N1.EdgesEnd(); EB != EE; ++EB) { - const Node& N2 = this->getNode((*EB)->ToolName()); + const Node* N2 = this->getNode((*EB)->ToolName()); + if (N2 == 0) + return 1; - if (!N2.ToolPtr) { + if (!N2->ToolPtr) { ++ret; errs() << "Error: there is an edge from '" << N1.ToolPtr->Name() << "' back to the root!\n\n"; @@ -355,7 +440,7 @@ int CompilationGraph::CheckLanguageNames() const { } const char* OutLang = N1.ToolPtr->OutputLanguage(); - const char** InLangs = N2.ToolPtr->InputLanguages(); + const char** InLangs = N2->ToolPtr->InputLanguages(); bool eq = false; for (;*InLangs; ++InLangs) { if (std::strcmp(OutLang, *InLangs) == 0) { @@ -367,11 +452,11 @@ int CompilationGraph::CheckLanguageNames() const { if (!eq) { ++ret; errs() << "Error: Output->input language mismatch in the edge '" - << N1.ToolPtr->Name() << "' -> '" << N2.ToolPtr->Name() + << N1.ToolPtr->Name() << "' -> '" << N2->ToolPtr->Name() << "'!\n" << "Expected one of { "; - InLangs = N2.ToolPtr->InputLanguages(); + InLangs = N2->ToolPtr->InputLanguages(); for (;*InLangs; ++InLangs) { errs() << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'"); } @@ -395,7 +480,7 @@ int CompilationGraph::CheckMultipleDefaultEdges() const { for (const_nodes_iterator B = this->NodesMap.begin(), E = this->NodesMap.end(); B != E; ++B) { const Node& N = B->second; - unsigned MaxWeight = 0; + int MaxWeight = 0; // Ignore the root node. if (!N.ToolPtr) @@ -403,7 +488,7 @@ int CompilationGraph::CheckMultipleDefaultEdges() const { for (Node::const_iterator EB = N.EdgesBegin(), EE = N.EdgesEnd(); EB != EE; ++EB) { - unsigned EdgeWeight = (*EB)->Weight(Dummy); + int EdgeWeight = (*EB)->Weight(Dummy); if (EdgeWeight > MaxWeight) { MaxWeight = EdgeWeight; } @@ -422,7 +507,12 @@ int CompilationGraph::CheckMultipleDefaultEdges() const { int CompilationGraph::CheckCycles() { unsigned deleted = 0; std::queue<Node*> Q; - Q.push(&getNode("root")); + + Node* Root = getNode("root"); + if (Root == 0) + return 1; + + Q.push(Root); // Try to delete all nodes that have no ingoing edges, starting from the // root. If there are any nodes left after this operation, then we have a @@ -434,7 +524,10 @@ int CompilationGraph::CheckCycles() { for (Node::iterator EB = A->EdgesBegin(), EE = A->EdgesEnd(); EB != EE; ++EB) { - Node* B = &getNode((*EB)->ToolName()); + Node* B = getNode((*EB)->ToolName()); + if (B == 0) + return 1; + B->DecrInEdges(); if (B->HasNoInEdges()) Q.push(B); @@ -453,18 +546,28 @@ int CompilationGraph::CheckCycles() { int CompilationGraph::Check () { // We try to catch as many errors as we can in one go. + int errs = 0; int ret = 0; // Check that output/input language names match. - ret += this->CheckLanguageNames(); + ret = this->CheckLanguageNames(); + if (ret < 0) + return 1; + errs += ret; // Check for multiple default edges. - ret += this->CheckMultipleDefaultEdges(); + ret = this->CheckMultipleDefaultEdges(); + if (ret < 0) + return 1; + errs += ret; // Check for cycles. - ret += this->CheckCycles(); + ret = this->CheckCycles(); + if (ret < 0) + return 1; + errs += ret; - return ret; + return errs; } // Code related to graph visualization. @@ -516,7 +619,7 @@ namespace llvm { } -void CompilationGraph::writeGraph(const std::string& OutputFilename) { +int CompilationGraph::writeGraph(const std::string& OutputFilename) { std::string ErrorInfo; raw_fd_ostream O(OutputFilename.c_str(), ErrorInfo); @@ -526,9 +629,11 @@ void CompilationGraph::writeGraph(const std::string& OutputFilename) { errs() << "done.\n"; } else { - throw std::runtime_error("Error opening file '" + OutputFilename - + "' for writing!"); + PrintError("Error opening file '" + OutputFilename + "' for writing!"); + return 1; } + + return 0; } void CompilationGraph::viewGraph() { diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp index b5e507dfc3a37..0a6613aa77a3b 100644 --- a/lib/CompilerDriver/Main.cpp +++ b/lib/CompilerDriver/Main.cpp @@ -11,16 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CompilerDriver/AutoGenerated.h" #include "llvm/CompilerDriver/BuiltinOptions.h" #include "llvm/CompilerDriver/CompilationGraph.h" #include "llvm/CompilerDriver/Error.h" -#include "llvm/CompilerDriver/Plugin.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Path.h" #include <sstream> -#include <stdexcept> #include <string> namespace cl = llvm::cl; @@ -31,9 +30,9 @@ namespace { std::stringstream* GlobalTimeLog; - sys::Path getTempDir() { - sys::Path tempDir; - + /// GetTempDir - Get the temporary directory location. Returns non-zero value + /// on error. + int GetTempDir(sys::Path& tempDir) { // The --temp-dir option. if (!TempDirname.empty()) { tempDir = TempDirname; @@ -41,7 +40,7 @@ namespace { // GCC 4.5-style -save-temps handling. else if (SaveTemps == SaveTempsEnum::Unset) { tempDir = sys::Path::GetTemporaryDirectory(); - return tempDir; + return 0; } else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) { tempDir = OutputFilename; @@ -49,35 +48,35 @@ namespace { } else { // SaveTemps == Cwd --> use current dir (leave tempDir empty). - return tempDir; + return 0; } if (!tempDir.exists()) { std::string ErrMsg; - if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) - throw std::runtime_error(ErrMsg); + if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) { + PrintError(ErrMsg); + return 1; + } } - return tempDir; + return 0; } - /// BuildTargets - A small wrapper for CompilationGraph::Build. + /// BuildTargets - A small wrapper for CompilationGraph::Build. Returns + /// non-zero value in case of error. int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) { int ret; - const sys::Path& tempDir = getTempDir(); + sys::Path tempDir; bool toDelete = (SaveTemps == SaveTempsEnum::Unset); - try { - ret = graph.Build(tempDir, langMap); - } - catch(...) { - if (toDelete) - tempDir.eraseFromDisk(true); - throw; - } + if (int ret = GetTempDir(tempDir)) + return ret; + + ret = graph.Build(tempDir, langMap); if (toDelete) tempDir.eraseFromDisk(true); + return ret; } } @@ -89,68 +88,58 @@ void AppendToGlobalTimeLog(const std::string& cmd, double time) { *GlobalTimeLog << "# " << cmd << ' ' << time << '\n'; } -// Sometimes plugins want to condition on the value in argv[0]. +// Sometimes user code wants to access the argv[0] value. const char* ProgramName; int Main(int argc, char** argv) { - try { - LanguageMap langMap; - CompilationGraph graph; - - ProgramName = argv[0]; + int ret = 0; + LanguageMap langMap; + CompilationGraph graph; - cl::ParseCommandLineOptions - (argc, argv, "LLVM Compiler Driver (Work In Progress)", - /* ReadResponseFiles = */ false); + ProgramName = argv[0]; - PluginLoader Plugins; - Plugins.RunInitialization(langMap, graph); + cl::ParseCommandLineOptions + (argc, argv, + /* Overview = */ "LLVM Compiler Driver (Work In Progress)", + /* ReadResponseFiles = */ false); - if (CheckGraph) { - int ret = graph.Check(); - if (!ret) - llvm::errs() << "check-graph: no errors found.\n"; + if (int ret = autogenerated::RunInitialization(langMap, graph)) + return ret; - return ret; - } + if (CheckGraph) { + ret = graph.Check(); + if (!ret) + llvm::errs() << "check-graph: no errors found.\n"; - if (ViewGraph) { - graph.viewGraph(); - if (!WriteGraph) - return 0; - } + return ret; + } - if (WriteGraph) { - graph.writeGraph(OutputFilename.empty() - ? std::string("compilation-graph.dot") - : OutputFilename); + if (ViewGraph) { + graph.viewGraph(); + if (!WriteGraph) return 0; - } + } - if (Time) { - GlobalTimeLog = new std::stringstream; - GlobalTimeLog->precision(2); - } + if (WriteGraph) { + const std::string& Out = (OutputFilename.empty() + ? std::string("compilation-graph.dot") + : OutputFilename); + return graph.writeGraph(Out); + } - int ret = BuildTargets(graph, langMap); + if (Time) { + GlobalTimeLog = new std::stringstream; + GlobalTimeLog->precision(2); + } - if (Time) { - llvm::errs() << GlobalTimeLog->str(); - delete GlobalTimeLog; - } + ret = BuildTargets(graph, langMap); - return ret; - } - catch(llvmc::error_code& ec) { - return ec.code(); + if (Time) { + llvm::errs() << GlobalTimeLog->str(); + delete GlobalTimeLog; } - catch(const std::exception& ex) { - llvm::errs() << argv[0] << ": " << ex.what() << '\n'; - } - catch(...) { - llvm::errs() << argv[0] << ": unknown error!\n"; - } - return 1; + + return ret; } } // end namespace llvmc diff --git a/lib/CompilerDriver/Makefile b/lib/CompilerDriver/Makefile index 66c6d11552fc7..8e8b73ca8f83f 100644 --- a/lib/CompilerDriver/Makefile +++ b/lib/CompilerDriver/Makefile @@ -10,39 +10,11 @@ LEVEL = ../.. # We don't want this library to appear in `llvm-config --libs` output, so its -# name doesn't start with "LLVM". +# name doesn't start with "LLVM" and NO_LLVM_CONFIG is set. -ifeq ($(ENABLE_LLVMC_DYNAMIC),1) - LIBRARYNAME = libCompilerDriver - LLVMLIBS = LLVMSupport.a LLVMSystem.a - LOADABLE_MODULE := 1 -else - LIBRARYNAME = CompilerDriver - LINK_COMPONENTS = support system -endif +LIBRARYNAME = CompilerDriver +LINK_COMPONENTS = support system +NO_LLVM_CONFIG = 1 -REQUIRES_EH := 1 -REQUIRES_RTTI := 1 include $(LEVEL)/Makefile.common - -ifeq ($(ENABLE_LLVMC_DYNAMIC_PLUGINS), 1) - CPP.Flags += -DENABLE_LLVMC_DYNAMIC_PLUGINS -endif - -# Copy libCompilerDriver to the bin dir so that llvmc can find it. -ifeq ($(ENABLE_LLVMC_DYNAMIC),1) - -FullLibName = $(LIBRARYNAME)$(SHLIBEXT) - -all-local:: $(ToolDir)/$(FullLibName) - -$(ToolDir)/$(FullLibName): $(LibDir)/$(FullLibName) $(ToolDir)/.dir - $(Echo) Copying $(BuildMode) Shared Library $(FullLibName) to $@ - -$(Verb) $(CP) $< $@ - -clean-local:: - $(Echo) Removing $(BuildMode) Shared Library $(FullLibName) \ - from $(ToolDir) - -$(Verb) $(RM) -f $(ToolDir)/$(FullLibName) -endif diff --git a/lib/CompilerDriver/Plugin.cpp b/lib/CompilerDriver/Plugin.cpp deleted file mode 100644 index 0fdfef4c6a29b..0000000000000 --- a/lib/CompilerDriver/Plugin.cpp +++ /dev/null @@ -1,78 +0,0 @@ -//===--- Plugin.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open -// Source License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Plugin support. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CompilerDriver/Plugin.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/System/Mutex.h" -#include <algorithm> -#include <vector> - -namespace { - - // Registry::Add<> does not do lifetime management (probably issues - // with static constructor/destructor ordering), so we have to - // implement it here. - // - // All this static registration/life-before-main model seems - // unnecessary convoluted to me. - - static bool pluginListInitialized = false; - typedef std::vector<const llvmc::BasePlugin*> PluginList; - static PluginList Plugins; - static llvm::ManagedStatic<llvm::sys::SmartMutex<true> > PluginMutex; - - struct ByPriority { - bool operator()(const llvmc::BasePlugin* lhs, - const llvmc::BasePlugin* rhs) { - return lhs->Priority() < rhs->Priority(); - } - }; -} - -namespace llvmc { - - PluginLoader::PluginLoader() { - llvm::sys::SmartScopedLock<true> Lock(*PluginMutex); - if (!pluginListInitialized) { - for (PluginRegistry::iterator B = PluginRegistry::begin(), - E = PluginRegistry::end(); B != E; ++B) - Plugins.push_back(B->instantiate()); - std::sort(Plugins.begin(), Plugins.end(), ByPriority()); - } - pluginListInitialized = true; - } - - PluginLoader::~PluginLoader() { - llvm::sys::SmartScopedLock<true> Lock(*PluginMutex); - if (pluginListInitialized) { - for (PluginList::iterator B = Plugins.begin(), E = Plugins.end(); - B != E; ++B) - delete (*B); - } - pluginListInitialized = false; - } - - void PluginLoader::RunInitialization(LanguageMap& langMap, - CompilationGraph& graph) const - { - llvm::sys::SmartScopedLock<true> Lock(*PluginMutex); - for (PluginList::iterator B = Plugins.begin(), E = Plugins.end(); - B != E; ++B) { - const BasePlugin* BP = *B; - BP->PreprocessOptions(); - BP->PopulateLanguageMap(langMap); - BP->PopulateCompilationGraph(graph); - } - } - -} diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index c7495d442d9cf..f8f1f4a78ee53 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -236,6 +236,10 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, return 1; } +void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) { + return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn)); +} + LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) { return wrap(unwrap(EE)->getTargetData()); } diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp index b367033d32b56..274f816f39e11 100644 --- a/lib/ExecutionEngine/JIT/Intercept.cpp +++ b/lib/ExecutionEngine/JIT/Intercept.cpp @@ -89,6 +89,10 @@ static int jit_atexit(void (*Fn)()) { return 0; // Always successful } +static int jit_noop() { + return 0; +} + //===----------------------------------------------------------------------===// // /// getPointerToNamedFunction - This method returns the address of the specified @@ -104,6 +108,14 @@ void *JIT::getPointerToNamedFunction(const std::string &Name, if (Name == "exit") return (void*)(intptr_t)&jit_exit; if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; + // We should not invoke parent's ctors/dtors from generated main()! + // On Mingw and Cygwin, the symbol __main is resolved to + // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors + // (and register wrong callee's dtors with atexit(3)). + // We expect ExecutionEngine::runStaticConstructorsDestructors() + // is called before ExecutionEngine::runFunctionAsMain() is called. + if (Name == "__main") return (void*)(intptr_t)&jit_noop; + const char *NameStr = Name.c_str(); // If this is an asm specifier, skip the sentinal. if (NameStr[0] == 1) ++NameStr; diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 67bd3ed10ad97..63125b79c8e26 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -67,7 +67,7 @@ extern "C" void LLVMLinkInJIT() { } -#if defined(__GNUC__) && !defined(__ARM__EABI__) +#if defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__USING_SJLJ_EXCEPTIONS__) // libgcc defines the __register_frame function to dynamically register new // dwarf frames for exception handling. This functionality is not portable @@ -219,10 +219,8 @@ ExecutionEngine *JIT::createJIT(Module *M, StringRef MArch, StringRef MCPU, const SmallVectorImpl<std::string>& MAttrs) { - // Make sure we can resolve symbols in the program as well. The zero arg - // to the function tells DynamicLibrary to load the program, not a library. - if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) - return 0; + // Try to register the program as a source of symbols to resolve against. + sys::DynamicLibrary::LoadLibraryPermanently(0, NULL); // Pick a target either via -march or by guessing the native arch. TargetMachine *TM = JIT::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr); @@ -308,7 +306,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, } // Register routine for informing unwinding runtime about new EH frames -#if defined(__GNUC__) && !defined(__ARM_EABI__) +#if defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__USING_SJLJ_EXCEPTIONS__) #if USE_KEYMGR struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*) _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST); diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp index 749a57d92c948..6e11a3cd9368d 100644 --- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp +++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp @@ -90,8 +90,8 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) { // section. This allows GDB to get a good stack trace, particularly on // linux x86_64. Mark this as a PROGBITS section that needs to be loaded // into memory at runtime. - ELFSection &EH = EW.getSection(".eh_frame", ELFSection::SHT_PROGBITS, - ELFSection::SHF_ALLOC); + ELFSection &EH = EW.getSection(".eh_frame", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC); // Pointers in the DWARF EH info are all relative to the EH frame start, // which is stored here. EH.Addr = (uint64_t)I.EhStart; @@ -102,9 +102,9 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) { // Add this single function to the symbol table, so the debugger prints the // name instead of '???'. We give the symbol default global visibility. ELFSym *FnSym = ELFSym::getGV(F, - ELFSym::STB_GLOBAL, - ELFSym::STT_FUNC, - ELFSym::STV_DEFAULT); + ELF::STB_GLOBAL, + ELF::STT_FUNC, + ELF::STV_DEFAULT); FnSym->SectionIdx = Text.SectionIdx; FnSym->Size = I.FnEnd - I.FnStart; FnSym->Value = 0; // Offset from start of section. diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index 4b3ca8759b8ac..1105bcc0437f2 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -595,443 +595,3 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, return StartEHPtr; } - -unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F, - JITCodeEmitter& jce, - unsigned char* StartFunction, - unsigned char* EndFunction) { - const TargetMachine& TM = F.getTarget(); - TD = TM.getTargetData(); - stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection(); - RI = TM.getRegisterInfo(); - JCE = &jce; - unsigned FinalSize = 0; - - FinalSize += GetExceptionTableSizeInBytes(&F); - - const std::vector<const Function *> Personalities = MMI->getPersonalities(); - FinalSize += - GetCommonEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()]); - - FinalSize += GetEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()], - StartFunction); - - return FinalSize; -} - -/// RoundUpToAlign - Add the specified alignment to FinalSize and returns -/// the new value. -static unsigned RoundUpToAlign(unsigned FinalSize, unsigned Alignment) { - if (Alignment == 0) Alignment = 1; - // Since we do not know where the buffer will be allocated, be pessimistic. - return FinalSize + Alignment; -} - -unsigned -JITDwarfEmitter::GetEHFrameSizeInBytes(const Function* Personality, - unsigned char* StartFunction) const { - unsigned PointerSize = TD->getPointerSize(); - unsigned FinalSize = 0; - // EH frame header. - FinalSize += PointerSize; - // FDE CIE Offset - FinalSize += 3 * PointerSize; - // If there is a personality and landing pads then point to the language - // specific data area in the exception table. - if (Personality) { - FinalSize += MCAsmInfo::getULEB128Size(4); - FinalSize += PointerSize; - } else { - FinalSize += MCAsmInfo::getULEB128Size(0); - } - - // Indicate locations of function specific callee saved registers in - // frame. - FinalSize += GetFrameMovesSizeInBytes((intptr_t)StartFunction, - MMI->getFrameMoves()); - - FinalSize = RoundUpToAlign(FinalSize, 4); - - // Double zeroes for the unwind runtime - FinalSize += 2 * PointerSize; - - return FinalSize; -} - -unsigned JITDwarfEmitter::GetCommonEHFrameSizeInBytes(const Function* Personality) - const { - - unsigned PointerSize = TD->getPointerSize(); - int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ? - PointerSize : -PointerSize; - unsigned FinalSize = 0; - // EH Common Frame header - FinalSize += PointerSize; - FinalSize += 4; - FinalSize += 1; - FinalSize += Personality ? 5 : 3; // "zPLR" or "zR" - FinalSize += MCAsmInfo::getULEB128Size(1); - FinalSize += MCAsmInfo::getSLEB128Size(stackGrowth); - FinalSize += 1; - - if (Personality) { - FinalSize += MCAsmInfo::getULEB128Size(7); - - // Encoding - FinalSize+= 1; - //Personality - FinalSize += PointerSize; - - FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); - FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); - - } else { - FinalSize += MCAsmInfo::getULEB128Size(1); - FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); - } - - std::vector<MachineMove> Moves; - RI->getInitialFrameState(Moves); - FinalSize += GetFrameMovesSizeInBytes(0, Moves); - FinalSize = RoundUpToAlign(FinalSize, 4); - return FinalSize; -} - -unsigned -JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr, - const std::vector<MachineMove> &Moves) const { - unsigned PointerSize = TD->getPointerSize(); - int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ? - PointerSize : -PointerSize; - bool IsLocal = BaseLabelPtr; - unsigned FinalSize = 0; - - for (unsigned i = 0, N = Moves.size(); i < N; ++i) { - const MachineMove &Move = Moves[i]; - MCSymbol *Label = Move.getLabel(); - - // Throw out move if the label is invalid. - if (Label && (*JCE->getLabelLocations())[Label] == 0) - continue; - - intptr_t LabelPtr = 0; - if (Label) LabelPtr = JCE->getLabelAddress(Label); - - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // Advance row if new location. - if (BaseLabelPtr && Label && (BaseLabelPtr != LabelPtr || !IsLocal)) { - FinalSize++; - FinalSize += PointerSize; - BaseLabelPtr = LabelPtr; - IsLocal = true; - } - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - if (!Src.isReg()) { - if (Src.getReg() == MachineLocation::VirtualFP) { - ++FinalSize; - } else { - ++FinalSize; - unsigned RegNum = RI->getDwarfRegNum(Src.getReg(), true); - FinalSize += MCAsmInfo::getULEB128Size(RegNum); - } - - int Offset = -Src.getOffset(); - - FinalSize += MCAsmInfo::getULEB128Size(Offset); - } else { - llvm_unreachable("Machine move no supported yet."); - } - } else if (Src.isReg() && - Src.getReg() == MachineLocation::VirtualFP) { - if (Dst.isReg()) { - ++FinalSize; - unsigned RegNum = RI->getDwarfRegNum(Dst.getReg(), true); - FinalSize += MCAsmInfo::getULEB128Size(RegNum); - } else { - llvm_unreachable("Machine move no supported yet."); - } - } else { - unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true); - int Offset = Dst.getOffset() / stackGrowth; - - if (Offset < 0) { - ++FinalSize; - FinalSize += MCAsmInfo::getULEB128Size(Reg); - FinalSize += MCAsmInfo::getSLEB128Size(Offset); - } else if (Reg < 64) { - ++FinalSize; - FinalSize += MCAsmInfo::getULEB128Size(Offset); - } else { - ++FinalSize; - FinalSize += MCAsmInfo::getULEB128Size(Reg); - FinalSize += MCAsmInfo::getULEB128Size(Offset); - } - } - } - return FinalSize; -} - -unsigned -JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const { - unsigned FinalSize = 0; - - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(JCE->getLabelLocations()); - - const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); - const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); - const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); - if (PadInfos.empty()) return 0; - - // Sort the landing pads in order of their type ids. This is used to fold - // duplicate actions. - SmallVector<const LandingPadInfo *, 64> LandingPads; - LandingPads.reserve(PadInfos.size()); - for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) - LandingPads.push_back(&PadInfos[i]); - std::sort(LandingPads.begin(), LandingPads.end(), PadLT); - - // Negative type ids index into FilterIds, positive type ids index into - // TypeInfos. The value written for a positive type id is just the type - // id itself. For a negative type id, however, the value written is the - // (negative) byte offset of the corresponding FilterIds entry. The byte - // offset is usually equal to the type id, because the FilterIds entries - // are written using a variable width encoding which outputs one byte per - // entry as long as the value written is not too large, but can differ. - // This kind of complication does not occur for positive type ids because - // type infos are output using a fixed width encoding. - // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i]. - SmallVector<int, 16> FilterOffsets; - FilterOffsets.reserve(FilterIds.size()); - int Offset = -1; - for(std::vector<unsigned>::const_iterator I = FilterIds.begin(), - E = FilterIds.end(); I != E; ++I) { - FilterOffsets.push_back(Offset); - Offset -= MCAsmInfo::getULEB128Size(*I); - } - - // Compute the actions table and gather the first action index for each - // landing pad site. - SmallVector<ActionEntry, 32> Actions; - SmallVector<unsigned, 64> FirstActions; - FirstActions.reserve(LandingPads.size()); - - int FirstAction = 0; - unsigned SizeActions = 0; - for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { - const LandingPadInfo *LP = LandingPads[i]; - const std::vector<int> &TypeIds = LP->TypeIds; - const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0; - unsigned SizeSiteActions = 0; - - if (NumShared < TypeIds.size()) { - unsigned SizeAction = 0; - ActionEntry *PrevAction = 0; - - if (NumShared) { - const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size(); - assert(Actions.size()); - PrevAction = &Actions.back(); - SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) + - MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); - for (unsigned j = NumShared; j != SizePrevIds; ++j) { - SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); - SizeAction += -PrevAction->NextAction; - PrevAction = PrevAction->Previous; - } - } - - // Compute the actions. - for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) { - int TypeID = TypeIds[I]; - assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); - int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; - unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID); - - int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; - SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction); - SizeSiteActions += SizeAction; - - ActionEntry Action = {ValueForTypeID, NextAction, PrevAction}; - Actions.push_back(Action); - - PrevAction = &Actions.back(); - } - - // Record the first action of the landing pad site. - FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; - } // else identical - re-use previous FirstAction - - FirstActions.push_back(FirstAction); - - // Compute this sites contribution to size. - SizeActions += SizeSiteActions; - } - - // Compute the call-site table. Entries must be ordered by address. - SmallVector<CallSiteEntry, 64> CallSites; - - RangeMapType PadMap; - for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { - const LandingPadInfo *LandingPad = LandingPads[i]; - for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) { - MCSymbol *BeginLabel = LandingPad->BeginLabels[j]; - assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); - PadRange P = { i, j }; - PadMap[BeginLabel] = P; - } - } - - bool MayThrow = false; - MCSymbol *LastLabel = 0; - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { - for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); - MI != E; ++MI) { - if (!MI->isLabel()) { - MayThrow |= MI->getDesc().isCall(); - continue; - } - - MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol(); - - if (BeginLabel == LastLabel) - MayThrow = false; - - RangeMapType::iterator L = PadMap.find(BeginLabel); - - if (L == PadMap.end()) - continue; - - PadRange P = L->second; - const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; - - assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && - "Inconsistent landing pad map!"); - - // If some instruction between the previous try-range and this one may - // throw, create a call-site entry with no landing pad for the region - // between the try-ranges. - if (MayThrow) { - CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0}; - CallSites.push_back(Site); - } - - LastLabel = LandingPad->EndLabels[P.RangeIndex]; - CallSiteEntry Site = {BeginLabel, LastLabel, - LandingPad->LandingPadLabel, FirstActions[P.PadIndex]}; - - assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel && - "Invalid landing pad!"); - - // Try to merge with the previous call-site. - if (CallSites.size()) { - CallSiteEntry &Prev = CallSites.back(); - if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { - // Extend the range of the previous entry. - Prev.EndLabel = Site.EndLabel; - continue; - } - } - - // Otherwise, create a new call-site. - CallSites.push_back(Site); - } - } - // If some instruction between the previous try-range and the end of the - // function may throw, create a call-site entry with no landing pad for the - // region following the try-range. - if (MayThrow) { - CallSiteEntry Site = {LastLabel, 0, 0, 0}; - CallSites.push_back(Site); - } - - // Final tallies. - unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start. - sizeof(int32_t) + // Site length. - sizeof(int32_t)); // Landing pad. - for (unsigned i = 0, e = CallSites.size(); i < e; ++i) - SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action); - - unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(); - - unsigned TypeOffset = sizeof(int8_t) + // Call site format - // Call-site table length - MCAsmInfo::getULEB128Size(SizeSites) + - SizeSites + SizeActions + SizeTypes; - - unsigned TotalSize = sizeof(int8_t) + // LPStart format - sizeof(int8_t) + // TType format - MCAsmInfo::getULEB128Size(TypeOffset) + // TType base offset - TypeOffset; - - unsigned SizeAlign = (4 - TotalSize) & 3; - - // Begin the exception table. - FinalSize = RoundUpToAlign(FinalSize, 4); - for (unsigned i = 0; i != SizeAlign; ++i) { - ++FinalSize; - } - - unsigned PointerSize = TD->getPointerSize(); - - // Emit the header. - ++FinalSize; - // Asm->EOL("LPStart format (DW_EH_PE_omit)"); - ++FinalSize; - // Asm->EOL("TType format (DW_EH_PE_absptr)"); - ++FinalSize; - // Asm->EOL("TType base offset"); - ++FinalSize; - // Asm->EOL("Call site format (DW_EH_PE_udata4)"); - ++FinalSize; - // Asm->EOL("Call-site table length"); - - // Emit the landing pad site information. - for (unsigned i = 0; i < CallSites.size(); ++i) { - CallSiteEntry &S = CallSites[i]; - - // Asm->EOL("Region start"); - FinalSize += PointerSize; - - //Asm->EOL("Region length"); - FinalSize += PointerSize; - - // Asm->EOL("Landing pad"); - FinalSize += PointerSize; - - FinalSize += MCAsmInfo::getULEB128Size(S.Action); - // Asm->EOL("Action"); - } - - // Emit the actions. - for (unsigned I = 0, N = Actions.size(); I != N; ++I) { - ActionEntry &Action = Actions[I]; - - //Asm->EOL("TypeInfo index"); - FinalSize += MCAsmInfo::getSLEB128Size(Action.ValueForTypeID); - //Asm->EOL("Next action"); - FinalSize += MCAsmInfo::getSLEB128Size(Action.NextAction); - } - - // Emit the type ids. - for (unsigned M = TypeInfos.size(); M; --M) { - // Asm->EOL("TypeInfo"); - FinalSize += PointerSize; - } - - // Emit the filter typeids. - for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) { - unsigned TypeID = FilterIds[j]; - FinalSize += MCAsmInfo::getULEB128Size(TypeID); - //Asm->EOL("Filter TypeInfo index"); - } - - FinalSize = RoundUpToAlign(FinalSize, 4); - - return FinalSize; -} diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h index e627550d6d0e6..30956820f357e 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h @@ -49,17 +49,6 @@ class JITDwarfEmitter { unsigned char* EndFunction, unsigned char* ExceptionTable) const; - unsigned GetExceptionTableSizeInBytes(MachineFunction* MF) const; - - unsigned - GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr, - const std::vector<MachineMove> &Moves) const; - - unsigned GetCommonEHFrameSizeInBytes(const Function* Personality) const; - - unsigned GetEHFrameSizeInBytes(const Function* Personality, - unsigned char* StartFunction) const; - public: JITDwarfEmitter(JIT& jit); @@ -71,11 +60,6 @@ public: unsigned char* &EHFramePtr); - unsigned GetDwarfTableSizeInBytes(MachineFunction& F, - JITCodeEmitter& JCE, - unsigned char* StartFunction, - unsigned char* EndFunction); - void setModuleInfo(MachineModuleInfo* Info) { MMI = Info; } diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 28d79daed350b..4c0d0789cced4 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -152,16 +152,6 @@ namespace { FunctionToCallSitesMap[F].insert(CallSite); } - // Returns the Function of the stub if a stub was erased, or NULL if there - // was no stub. This function uses the call-site->function map to find a - // relevant function, but asserts that only stubs and not other call sites - // will be passed in. - Function *EraseStub(const MutexGuard &locked, void *Stub); - - void EraseAllCallSitesFor(const MutexGuard &locked, Function *F) { - assert(locked.holds(TheJIT->lock)); - EraseAllCallSitesForPrelocked(F); - } void EraseAllCallSitesForPrelocked(Function *F); // Erases _all_ call sites regardless of their function. This is used to @@ -223,9 +213,6 @@ namespace { /// specified GV address. void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress); - void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs, - SmallVectorImpl<void*> &Ptrs); - /// getGOTIndexForAddress - Return a new or existing index in the GOT for /// an address. This function only manages slots, it does not manage the /// contents of the slots or the memory associated with the GOT. @@ -398,7 +385,6 @@ namespace { /// classof - Methods for support type inquiry through isa, cast, and /// dyn_cast: /// - static inline bool classof(const JITEmitter*) { return true; } static inline bool classof(const MachineCodeEmitter*) { return true; } JITResolver &getJITResolver() { return Resolver; } @@ -480,26 +466,10 @@ namespace { if (DE.get()) DE->setModuleInfo(Info); } - void setMemoryExecutable() { - MemMgr->setMemoryExecutable(); - } - - JITMemoryManager *getMemMgr() const { return MemMgr; } - private: void *getPointerToGlobal(GlobalValue *GV, void *Reference, bool MayNeedFarStub); void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference); - unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size); - unsigned addSizeOfGlobalsInConstantVal( - const Constant *C, unsigned Size, - SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, - SmallVectorImpl<const GlobalVariable*> &Worklist); - unsigned addSizeOfGlobalsInInitializer( - const Constant *Init, unsigned Size, - SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, - SmallVectorImpl<const GlobalVariable*> &Worklist); - unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF); }; } @@ -507,39 +477,6 @@ void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) { JRS->EraseAllCallSitesForPrelocked(F); } -Function *JITResolverState::EraseStub(const MutexGuard &locked, void *Stub) { - CallSiteToFunctionMapTy::iterator C2F_I = - CallSiteToFunctionMap.find(Stub); - if (C2F_I == CallSiteToFunctionMap.end()) { - // Not a stub. - return NULL; - } - - StubToResolverMap->UnregisterStubResolver(Stub); - - Function *const F = C2F_I->second; -#ifndef NDEBUG - void *RealStub = FunctionToLazyStubMap.lookup(F); - assert(RealStub == Stub && - "Call-site that wasn't a stub passed in to EraseStub"); -#endif - FunctionToLazyStubMap.erase(F); - CallSiteToFunctionMap.erase(C2F_I); - - // Remove the stub from the function->call-sites map, and remove the whole - // entry from the map if that was the last call site. - FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F); - assert(F2C_I != FunctionToCallSitesMap.end() && - "FunctionToCallSitesMap broken"); - bool Erased = F2C_I->second.erase(Stub); - (void)Erased; - assert(Erased && "FunctionToCallSitesMap broken"); - if (F2C_I->second.empty()) - FunctionToCallSitesMap.erase(F2C_I); - - return F; -} - void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) { FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F); if (F2C == FunctionToCallSitesMap.end()) @@ -690,28 +627,6 @@ unsigned JITResolver::getGOTIndexForAddr(void* addr) { return idx; } -void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs, - SmallVectorImpl<void*> &Ptrs) { - MutexGuard locked(TheJIT->lock); - - const FunctionToLazyStubMapTy &FM = state.getFunctionToLazyStubMap(locked); - GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); - - for (FunctionToLazyStubMapTy::const_iterator i = FM.begin(), e = FM.end(); - i != e; ++i){ - Function *F = i->first; - if (F->isDeclaration() && F->hasExternalLinkage()) { - GVs.push_back(i->first); - Ptrs.push_back(i->second); - } - } - for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end(); - i != e; ++i) { - GVs.push_back(i->first); - Ptrs.push_back(i->second); - } -} - /// JITCompilerFn - This function is called when a lazy compilation stub has /// been entered. It looks up which function this stub corresponds to, compiles /// it if necessary, then returns the resultant function pointer. @@ -831,7 +746,7 @@ void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { if (DL.isUnknown()) return; if (!BeforePrintingInsn) return; - const LLVMContext& Context = EmissionDetails.MF->getFunction()->getContext(); + const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext(); if (DL.getScope(Context) != 0 && PrevDL != DL) { JITEvent_EmittedFunctionDetails::LineStart NextLine; @@ -859,184 +774,6 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, return Size; } -static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI, JIT *jit) { - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - if (JT.empty()) return 0; - - unsigned NumEntries = 0; - for (unsigned i = 0, e = JT.size(); i != e; ++i) - NumEntries += JT[i].MBBs.size(); - - return NumEntries * MJTI->getEntrySize(*jit->getTargetData()); -} - -static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) { - if (Alignment == 0) Alignment = 1; - // Since we do not know where the buffer will be allocated, be pessimistic. - return Size + Alignment; -} - -/// addSizeOfGlobal - add the size of the global (plus any alignment padding) -/// into the running total Size. - -unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) { - const Type *ElTy = GV->getType()->getElementType(); - size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy); - size_t GVAlign = - (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV); - DEBUG(dbgs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign); - DEBUG(GV->dump()); - // Assume code section ends with worst possible alignment, so first - // variable needs maximal padding. - if (Size==0) - Size = 1; - Size = ((Size+GVAlign-1)/GVAlign)*GVAlign; - Size += GVSize; - return Size; -} - -/// addSizeOfGlobalsInConstantVal - find any globals that we haven't seen yet -/// but are referenced from the constant; put them in SeenGlobals and the -/// Worklist, and add their size into the running total Size. - -unsigned JITEmitter::addSizeOfGlobalsInConstantVal( - const Constant *C, - unsigned Size, - SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, - SmallVectorImpl<const GlobalVariable*> &Worklist) { - // If its undefined, return the garbage. - if (isa<UndefValue>(C)) - return Size; - - // If the value is a ConstantExpr - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { - Constant *Op0 = CE->getOperand(0); - switch (CE->getOpcode()) { - case Instruction::GetElementPtr: - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: { - Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist); - break; - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist); - Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size, - SeenGlobals, Worklist); - break; - } - default: { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "ConstantExpr not handled: " << *CE; - report_fatal_error(Msg.str()); - } - } - } - - if (C->getType()->getTypeID() == Type::PointerTyID) - if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C)) - if (SeenGlobals.insert(GV)) { - Worklist.push_back(GV); - Size = addSizeOfGlobal(GV, Size); - } - - return Size; -} - -/// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet -/// but are referenced from the given initializer. - -unsigned JITEmitter::addSizeOfGlobalsInInitializer( - const Constant *Init, - unsigned Size, - SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals, - SmallVectorImpl<const GlobalVariable*> &Worklist) { - if (!isa<UndefValue>(Init) && - !isa<ConstantVector>(Init) && - !isa<ConstantAggregateZero>(Init) && - !isa<ConstantArray>(Init) && - !isa<ConstantStruct>(Init) && - Init->getType()->isFirstClassType()) - Size = addSizeOfGlobalsInConstantVal(Init, Size, SeenGlobals, Worklist); - return Size; -} - -/// GetSizeOfGlobalsInBytes - walk the code for the function, looking for -/// globals; then walk the initializers of those globals looking for more. -/// If their size has not been considered yet, add it into the running total -/// Size. - -unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { - unsigned Size = 0; - SmallPtrSet<const GlobalVariable*, 8> SeenGlobals; - - for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); - MBB != E; ++MBB) { - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - const TargetInstrDesc &Desc = I->getDesc(); - const MachineInstr &MI = *I; - unsigned NumOps = Desc.getNumOperands(); - for (unsigned CurOp = 0; CurOp < NumOps; CurOp++) { - const MachineOperand &MO = MI.getOperand(CurOp); - if (MO.isGlobal()) { - const GlobalValue* V = MO.getGlobal(); - const GlobalVariable *GV = dyn_cast<const GlobalVariable>(V); - if (!GV) - continue; - // If seen in previous function, it will have an entry here. - if (TheJIT->getPointerToGlobalIfAvailable( - const_cast<GlobalVariable *>(GV))) - continue; - // If seen earlier in this function, it will have an entry here. - // FIXME: it should be possible to combine these tables, by - // assuming the addresses of the new globals in this module - // start at 0 (or something) and adjusting them after codegen - // complete. Another possibility is to grab a marker bit in GV. - if (SeenGlobals.insert(GV)) - // A variable as yet unseen. Add in its size. - Size = addSizeOfGlobal(GV, Size); - } - } - } - } - DEBUG(dbgs() << "JIT: About to look through initializers\n"); - // Look for more globals that are referenced only from initializers. - SmallVector<const GlobalVariable*, 8> Worklist( - SeenGlobals.begin(), SeenGlobals.end()); - while (!Worklist.empty()) { - const GlobalVariable* GV = Worklist.back(); - Worklist.pop_back(); - if (GV->hasInitializer()) - Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size, - SeenGlobals, Worklist); - } - - return Size; -} - void JITEmitter::startFunction(MachineFunction &F) { DEBUG(dbgs() << "JIT: Starting CodeGen of Function " << F.getFunction()->getName() << "\n"); @@ -1044,43 +781,8 @@ void JITEmitter::startFunction(MachineFunction &F) { uintptr_t ActualSize = 0; // Set the memory writable, if it's not already MemMgr->setMemoryWritable(); - if (MemMgr->NeedsExactSize()) { - DEBUG(dbgs() << "JIT: ExactSize\n"); - const TargetInstrInfo* TII = F.getTarget().getInstrInfo(); - MachineConstantPool *MCP = F.getConstantPool(); - - // Ensure the constant pool/jump table info is at least 4-byte aligned. - ActualSize = RoundUpToAlign(ActualSize, 16); - - // Add the alignment of the constant pool - ActualSize = RoundUpToAlign(ActualSize, MCP->getConstantPoolAlignment()); - - // Add the constant pool size - ActualSize += GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData()); - - if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) { - // Add the aligment of the jump table info - ActualSize = RoundUpToAlign(ActualSize, - MJTI->getEntryAlignment(*TheJIT->getTargetData())); - - // Add the jump table size - ActualSize += GetJumpTableSizeInBytes(MJTI, TheJIT); - } - - // Add the alignment for the function - ActualSize = RoundUpToAlign(ActualSize, - std::max(F.getFunction()->getAlignment(), 8U)); - - // Add the function size - ActualSize += TII->GetFunctionSizeInBytes(F); - - DEBUG(dbgs() << "JIT: ActualSize before globals " << ActualSize << "\n"); - // Add the size of the globals that will be allocated after this function. - // These are all the ones referenced from this function that were not - // previously allocated. - ActualSize += GetSizeOfGlobalsInBytes(F); - DEBUG(dbgs() << "JIT: ActualSize after globals " << ActualSize << "\n"); - } else if (SizeEstimate > 0) { + + if (SizeEstimate > 0) { // SizeEstimate will be non-zero on reallocation attempts. ActualSize = SizeEstimate; } @@ -1268,9 +970,6 @@ bool JITEmitter::finishFunction(MachineFunction &F) { SavedBufferEnd = BufferEnd; SavedCurBufferPtr = CurBufferPtr; - if (MemMgr->NeedsExactSize()) - ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd); - BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(), ActualSize); BufferEnd = BufferBegin+ActualSize; diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 8487c83ce36ab..7e8245a9e3a6b 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Path.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/ADT/DenseMap.h" using namespace llvm; @@ -96,15 +97,6 @@ public: return 0; } - /// erase - Remove the specified type, returning true if it was in the set. - bool erase(const Type *Ty) { - if (!TheMap.erase(Ty)) - return false; - if (Ty->isAbstract()) - Ty->removeAbstractTypeUser(this); - return true; - } - /// insert - This returns true if the pointer was new to the set, false if it /// was already in the set. bool insert(const Type *Src, const Type *Dst) { @@ -334,97 +326,6 @@ static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) { return false; } -#ifndef NDEBUG -static void PrintMap(const std::map<const Value*, Value*> &M) { - for (std::map<const Value*, Value*>::const_iterator I = M.begin(), E =M.end(); - I != E; ++I) { - dbgs() << " Fr: " << (void*)I->first << " "; - I->first->dump(); - dbgs() << " To: " << (void*)I->second << " "; - I->second->dump(); - dbgs() << "\n"; - } -} -#endif - - -// RemapOperand - Use ValueMap to convert constants from one module to another. -static Value *RemapOperand(const Value *In, - std::map<const Value*, Value*> &ValueMap) { - std::map<const Value*,Value*>::const_iterator I = ValueMap.find(In); - if (I != ValueMap.end()) - return I->second; - - // Check to see if it's a constant that we are interested in transforming. - Value *Result = 0; - if (const Constant *CPV = dyn_cast<Constant>(In)) { - if ((!isa<DerivedType>(CPV->getType()) && !isa<ConstantExpr>(CPV)) || - isa<ConstantInt>(CPV) || isa<ConstantAggregateZero>(CPV)) - return const_cast<Constant*>(CPV); // Simple constants stay identical. - - if (const ConstantArray *CPA = dyn_cast<ConstantArray>(CPV)) { - std::vector<Constant*> Operands(CPA->getNumOperands()); - for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) - Operands[i] =cast<Constant>(RemapOperand(CPA->getOperand(i), ValueMap)); - Result = ConstantArray::get(cast<ArrayType>(CPA->getType()), Operands); - } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(CPV)) { - std::vector<Constant*> Operands(CPS->getNumOperands()); - for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) - Operands[i] =cast<Constant>(RemapOperand(CPS->getOperand(i), ValueMap)); - Result = ConstantStruct::get(cast<StructType>(CPS->getType()), Operands); - } else if (isa<ConstantPointerNull>(CPV) || isa<UndefValue>(CPV)) { - Result = const_cast<Constant*>(CPV); - } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CPV)) { - std::vector<Constant*> Operands(CP->getNumOperands()); - for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) - Operands[i] = cast<Constant>(RemapOperand(CP->getOperand(i), ValueMap)); - Result = ConstantVector::get(Operands); - } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) { - std::vector<Constant*> Ops; - for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) - Ops.push_back(cast<Constant>(RemapOperand(CE->getOperand(i),ValueMap))); - Result = CE->getWithOperands(Ops); - } else if (const BlockAddress *CE = dyn_cast<BlockAddress>(CPV)) { - Result = BlockAddress::get( - cast<Function>(RemapOperand(CE->getFunction(), ValueMap)), - CE->getBasicBlock()); - } else { - assert(!isa<GlobalValue>(CPV) && "Unmapped global?"); - llvm_unreachable("Unknown type of derived type constant value!"); - } - } else if (const MDNode *MD = dyn_cast<MDNode>(In)) { - if (MD->isFunctionLocal()) { - SmallVector<Value*, 4> Elts; - for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { - if (MD->getOperand(i)) - Elts.push_back(RemapOperand(MD->getOperand(i), ValueMap)); - else - Elts.push_back(NULL); - } - Result = MDNode::get(In->getContext(), Elts.data(), MD->getNumOperands()); - } else { - Result = const_cast<Value*>(In); - } - } else if (isa<MDString>(In) || isa<InlineAsm>(In) || isa<Instruction>(In)) { - Result = const_cast<Value*>(In); - } - - // Cache the mapping in our local map structure - if (Result) { - ValueMap[In] = Result; - return Result; - } - -#ifndef NDEBUG - dbgs() << "LinkModules ValueMap: \n"; - PrintMap(ValueMap); - - dbgs() << "Couldn't remap value: " << (void*)In << " " << *In << "\n"; - llvm_unreachable("Couldn't remap value!"); -#endif - return 0; -} - /// ForceRenaming - The LLVM SymbolTable class autorenames globals that conflict /// in the symbol table. This is good for all clients except for us. Go /// through the trouble to force this back. @@ -541,25 +442,24 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src, } // Insert all of the named mdnoes in Src into the Dest module. -static void LinkNamedMDNodes(Module *Dest, Module *Src) { +static void LinkNamedMDNodes(Module *Dest, Module *Src, + ValueToValueMapTy &ValueMap) { for (Module::const_named_metadata_iterator I = Src->named_metadata_begin(), E = Src->named_metadata_end(); I != E; ++I) { const NamedMDNode *SrcNMD = I; - NamedMDNode *DestNMD = Dest->getNamedMetadata(SrcNMD->getName()); - if (!DestNMD) - NamedMDNode::Create(SrcNMD, Dest); - else { - // Add Src elements into Dest node. - for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i) - DestNMD->addOperand(SrcNMD->getOperand(i)); - } + NamedMDNode *DestNMD = Dest->getOrInsertNamedMetadata(SrcNMD->getName()); + // Add Src elements into Dest node. + for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i) + DestNMD->addOperand(cast<MDNode>(MapValue(SrcNMD->getOperand(i), + ValueMap, + true))); } } // LinkGlobals - Loop through the global variables in the src module and merge // them into the dest module. static bool LinkGlobals(Module *Dest, const Module *Src, - std::map<const Value*, Value*> &ValueMap, + ValueToValueMapTy &ValueMap, std::multimap<std::string, GlobalVariable *> &AppendingVars, std::string *Err) { ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable(); @@ -735,6 +635,12 @@ CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) { else if (SL == GlobalValue::LinkerPrivateLinkage && DL == GlobalValue::LinkerPrivateLinkage) return GlobalValue::LinkerPrivateLinkage; + else if (SL == GlobalValue::LinkerPrivateWeakLinkage && + DL == GlobalValue::LinkerPrivateWeakLinkage) + return GlobalValue::LinkerPrivateWeakLinkage; + else if (SL == GlobalValue::LinkerPrivateWeakDefAutoLinkage && + DL == GlobalValue::LinkerPrivateWeakDefAutoLinkage) + return GlobalValue::LinkerPrivateWeakDefAutoLinkage; else { assert (SL == GlobalValue::PrivateLinkage && DL == GlobalValue::PrivateLinkage && "Unexpected linkage type"); @@ -746,7 +652,7 @@ CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) { // dest module. We're assuming, that all functions/global variables were already // linked in. static bool LinkAlias(Module *Dest, const Module *Src, - std::map<const Value*, Value*> &ValueMap, + ValueToValueMapTy &ValueMap, std::string *Err) { // Loop over all alias in the src module for (Module::const_alias_iterator I = Src->alias_begin(), @@ -757,7 +663,7 @@ static bool LinkAlias(Module *Dest, const Module *Src, // Globals were already linked, thus we can just query ValueMap for variant // of SAliasee in Dest. - std::map<const Value*,Value*>::const_iterator VMI = ValueMap.find(SAliasee); + ValueToValueMapTy::const_iterator VMI = ValueMap.find(SAliasee); assert(VMI != ValueMap.end() && "Aliasee not linked"); GlobalValue* DAliasee = cast<GlobalValue>(VMI->second); GlobalValue* DGV = NULL; @@ -888,7 +794,7 @@ static bool LinkAlias(Module *Dest, const Module *Src, ForceRenaming(NewGA, SGA->getName()); // Remember this mapping so uses in the source module get remapped - // later by RemapOperand. + // later by MapValue. ValueMap[SGA] = NewGA; } @@ -899,7 +805,7 @@ static bool LinkAlias(Module *Dest, const Module *Src, // LinkGlobalInits - Update the initializers in the Dest module now that all // globals that may be referenced are in Dest. static bool LinkGlobalInits(Module *Dest, const Module *Src, - std::map<const Value*, Value*> &ValueMap, + ValueToValueMapTy &ValueMap, std::string *Err) { // Loop over all of the globals in the src module, mapping them over as we go for (Module::const_global_iterator I = Src->global_begin(), @@ -909,7 +815,7 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src, if (SGV->hasInitializer()) { // Only process initialized GV's // Figure out what the initializer looks like in the dest module... Constant *SInit = - cast<Constant>(RemapOperand(SGV->getInitializer(), ValueMap)); + cast<Constant>(MapValue(SGV->getInitializer(), ValueMap, true)); // Grab destination global variable or alias. GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts()); @@ -954,7 +860,7 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src, // to the Dest function... // static bool LinkFunctionProtos(Module *Dest, const Module *Src, - std::map<const Value*, Value*> &ValueMap, + ValueToValueMapTy &ValueMap, std::string *Err) { ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable(); @@ -1039,7 +945,7 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src, ForceRenaming(NewDF, SF->getName()); // Remember this mapping so uses in the source module get remapped - // later by RemapOperand. + // later by MapValue. ValueMap[SF] = NewDF; continue; } @@ -1069,7 +975,7 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src, // fix up references to values. At this point we know that Dest is an external // function, and that Src is not. static bool LinkFunctionBody(Function *Dest, Function *Src, - std::map<const Value*, Value*> &ValueMap, + ValueToValueMapTy &ValueMap, std::string *Err) { assert(Src && Dest && Dest->isDeclaration() && !Src->isDeclaration()); @@ -1091,12 +997,30 @@ static bool LinkFunctionBody(Function *Dest, Function *Src, // the Source function as operands. Loop through all of the operands of the // functions and patch them up to point to the local versions... // + // This is the same as RemapInstruction, except that it avoids remapping + // instruction and basic block operands. + // for (Function::iterator BB = Dest->begin(), BE = Dest->end(); BB != BE; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + // Remap operands. for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) if (!isa<Instruction>(*OI) && !isa<BasicBlock>(*OI)) - *OI = RemapOperand(*OI, ValueMap); + *OI = MapValue(*OI, ValueMap, true); + + // Remap attached metadata. + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + I->getAllMetadata(MDs); + for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator + MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { + Value *Old = MI->second; + if (!isa<Instruction>(Old) && !isa<BasicBlock>(Old)) { + Value *New = MapValue(Old, ValueMap, true); + if (New != Old) + I->setMetadata(MI->first, cast<MDNode>(New)); + } + } + } // There is no need to map the arguments anymore. for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end(); @@ -1111,7 +1035,7 @@ static bool LinkFunctionBody(Function *Dest, Function *Src, // source module into the DestModule. This consists basically of copying the // function over and fixing up references to values. static bool LinkFunctionBodies(Module *Dest, Module *Src, - std::map<const Value*, Value*> &ValueMap, + ValueToValueMapTy &ValueMap, std::string *Err) { // Loop over all of the functions in the src module, mapping them over as we @@ -1319,8 +1243,10 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) { return true; // ValueMap - Mapping of values from what they used to be in Src, to what they - // are now in Dest. - std::map<const Value*, Value*> ValueMap; + // are now in Dest. ValueToValueMapTy is a ValueMap, which involves some + // overhead due to the use of Value handles which the Linker doesn't actually + // need, but this allows us to reuse the ValueMapper code. + ValueToValueMapTy ValueMap; // AppendingVars - Keep track of global variables in the destination module // with appending linkage. After the module is linked together, they are @@ -1334,9 +1260,6 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) { AppendingVars.insert(std::make_pair(I->getName(), I)); } - // Insert all of the named mdnoes in Src into the Dest module. - LinkNamedMDNodes(Dest, Src); - // Insert all of the globals in src into the Dest module... without linking // initializers (which could refer to functions not yet mapped over). if (LinkGlobals(Dest, Src, ValueMap, AppendingVars, ErrorMsg)) @@ -1370,6 +1293,11 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) { // Resolve all uses of aliases with aliasees if (ResolveAliases(Dest)) return true; + // Remap all of the named mdnoes in Src into the Dest module. We do this + // after linking GlobalValues so that MDNodes that reference GlobalValues + // are properly remapped. + LinkNamedMDNodes(Dest, Src, ValueMap); + // If the source library's module id is in the dependent library list of the // destination library, remove it since that module is now linked in. sys::Path modId; diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index fc4f3c69482ac..60a3a3e3e3128 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMMC + ELFObjectWriter.cpp MCAsmInfo.cpp MCAsmInfoCOFF.cpp MCAsmInfoDarwin.cpp @@ -7,10 +8,12 @@ add_llvm_library(LLVMMC MCCodeEmitter.cpp MCContext.cpp MCDisassembler.cpp + MCELFStreamer.cpp MCExpr.cpp MCInst.cpp MCInstPrinter.cpp MCLabel.cpp + MCDwarf.cpp MCLoggingStreamer.cpp MCMachOStreamer.cpp MCNullStreamer.cpp diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp new file mode 100644 index 0000000000000..cf35b45715e1e --- /dev/null +++ b/lib/MC/ELFObjectWriter.cpp @@ -0,0 +1,973 @@ +//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF object file writer information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/ELFObjectWriter.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ELF.h" +#include "llvm/Target/TargetAsmBackend.h" + +#include "../Target/X86/X86FixupKinds.h" + +#include <vector> +using namespace llvm; + +namespace { + + class ELFObjectWriterImpl { + static bool isFixupKindX86PCRel(unsigned Kind) { + switch (Kind) { + default: + return false; + case X86::reloc_pcrel_1byte: + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + return true; + } + } + + /*static bool isFixupKindX86RIPRel(unsigned Kind) { + return Kind == X86::reloc_riprel_4byte || + Kind == X86::reloc_riprel_4byte_movq_load; + }*/ + + + /// ELFSymbolData - Helper struct for containing some precomputed information + /// on symbols. + struct ELFSymbolData { + MCSymbolData *SymbolData; + uint64_t StringIndex; + uint32_t SectionIndex; + + // Support lexicographic sorting. + bool operator<(const ELFSymbolData &RHS) const { + return SymbolData->getSymbol().getName() < + RHS.SymbolData->getSymbol().getName(); + } + }; + + /// @name Relocation Data + /// @{ + + struct ELFRelocationEntry { + // Make these big enough for both 32-bit and 64-bit + uint64_t r_offset; + uint64_t r_info; + uint64_t r_addend; + + // Support lexicographic sorting. + bool operator<(const ELFRelocationEntry &RE) const { + return RE.r_offset < r_offset; + } + }; + + llvm::DenseMap<const MCSectionData*, + std::vector<ELFRelocationEntry> > Relocations; + DenseMap<const MCSection*, uint64_t> SectionStringTableIndex; + + /// @} + /// @name Symbol Table Data + /// @{ + + SmallString<256> StringTable; + std::vector<ELFSymbolData> LocalSymbolData; + std::vector<ELFSymbolData> ExternalSymbolData; + std::vector<ELFSymbolData> UndefinedSymbolData; + + /// @} + + ELFObjectWriter *Writer; + + raw_ostream &OS; + + // This holds the current offset into the object file. + size_t FileOff; + + unsigned Is64Bit : 1; + + bool HasRelocationAddend; + + // This holds the symbol table index of the last local symbol. + unsigned LastLocalSymbolIndex; + // This holds the .strtab section index. + unsigned StringTableIndex; + + unsigned ShstrtabIndex; + + public: + ELFObjectWriterImpl(ELFObjectWriter *_Writer, bool _Is64Bit, + bool _HasRelAddend) + : Writer(_Writer), OS(Writer->getStream()), FileOff(0), + Is64Bit(_Is64Bit), HasRelocationAddend(_HasRelAddend) { + } + + void Write8(uint8_t Value) { Writer->Write8(Value); } + void Write16(uint16_t Value) { Writer->Write16(Value); } + void Write32(uint32_t Value) { Writer->Write32(Value); } + //void Write64(uint64_t Value) { Writer->Write64(Value); } + void WriteZeros(unsigned N) { Writer->WriteZeros(N); } + //void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) { + // Writer->WriteBytes(Str, ZeroFillSize); + //} + + void WriteWord(uint64_t W) { + if (Is64Bit) + Writer->Write64(W); + else + Writer->Write32(W); + } + + void String8(char *buf, uint8_t Value) { + buf[0] = Value; + } + + void StringLE16(char *buf, uint16_t Value) { + buf[0] = char(Value >> 0); + buf[1] = char(Value >> 8); + } + + void StringLE32(char *buf, uint32_t Value) { + StringLE16(buf, uint16_t(Value >> 0)); + StringLE16(buf + 2, uint16_t(Value >> 16)); + } + + void StringLE64(char *buf, uint64_t Value) { + StringLE32(buf, uint32_t(Value >> 0)); + StringLE32(buf + 4, uint32_t(Value >> 32)); + } + + void StringBE16(char *buf ,uint16_t Value) { + buf[0] = char(Value >> 8); + buf[1] = char(Value >> 0); + } + + void StringBE32(char *buf, uint32_t Value) { + StringBE16(buf, uint16_t(Value >> 16)); + StringBE16(buf + 2, uint16_t(Value >> 0)); + } + + void StringBE64(char *buf, uint64_t Value) { + StringBE32(buf, uint32_t(Value >> 32)); + StringBE32(buf + 4, uint32_t(Value >> 0)); + } + + void String16(char *buf, uint16_t Value) { + if (Writer->isLittleEndian()) + StringLE16(buf, Value); + else + StringBE16(buf, Value); + } + + void String32(char *buf, uint32_t Value) { + if (Writer->isLittleEndian()) + StringLE32(buf, Value); + else + StringBE32(buf, Value); + } + + void String64(char *buf, uint64_t Value) { + if (Writer->isLittleEndian()) + StringLE64(buf, Value); + else + StringBE64(buf, Value); + } + + void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections); + + void WriteSymbolEntry(MCDataFragment *F, uint64_t name, uint8_t info, + uint64_t value, uint64_t size, + uint8_t other, uint16_t shndx); + + void WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD, + const MCAsmLayout &Layout); + + void WriteSymbolTable(MCDataFragment *F, const MCAssembler &Asm, + const MCAsmLayout &Layout); + + void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue); + + uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm, + const MCSymbol *S); + + /// ComputeSymbolTable - Compute the symbol table data + /// + /// \param StringTable [out] - The string table data. + /// \param StringIndexMap [out] - Map from symbol names to offsets in the + /// string table. + void ComputeSymbolTable(MCAssembler &Asm); + + void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout, + const MCSectionData &SD); + + void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) { + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + WriteRelocation(Asm, Layout, *it); + } + } + + void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout); + + void ExecutePostLayoutBinding(MCAssembler &Asm) { + // Compute symbol table information. + ComputeSymbolTable(Asm); + } + + void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, + uint64_t Address, uint64_t Offset, + uint64_t Size, uint32_t Link, uint32_t Info, + uint64_t Alignment, uint64_t EntrySize); + + void WriteRelocationsFragment(const MCAssembler &Asm, MCDataFragment *F, + const MCSectionData *SD); + + void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout); + }; + +} + +// Emit the ELF header. +void ELFObjectWriterImpl::WriteHeader(uint64_t SectionDataSize, + unsigned NumberOfSections) { + // ELF Header + // ---------- + // + // Note + // ---- + // emitWord method behaves differently for ELF32 and ELF64, writing + // 4 bytes in the former and 8 in the latter. + + Write8(0x7f); // e_ident[EI_MAG0] + Write8('E'); // e_ident[EI_MAG1] + Write8('L'); // e_ident[EI_MAG2] + Write8('F'); // e_ident[EI_MAG3] + + Write8(Is64Bit ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS] + + // e_ident[EI_DATA] + Write8(Writer->isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB); + + Write8(ELF::EV_CURRENT); // e_ident[EI_VERSION] + Write8(ELF::ELFOSABI_LINUX); // e_ident[EI_OSABI] + Write8(0); // e_ident[EI_ABIVERSION] + + WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD); + + Write16(ELF::ET_REL); // e_type + + // FIXME: Make this configurable + Write16(Is64Bit ? ELF::EM_X86_64 : ELF::EM_386); // e_machine = target + + Write32(ELF::EV_CURRENT); // e_version + WriteWord(0); // e_entry, no entry point in .o file + WriteWord(0); // e_phoff, no program header for .o + WriteWord(SectionDataSize + (Is64Bit ? sizeof(ELF::Elf64_Ehdr) : + sizeof(ELF::Elf32_Ehdr))); // e_shoff = sec hdr table off in bytes + + // FIXME: Make this configurable. + Write32(0); // e_flags = whatever the target wants + + // e_ehsize = ELF header size + Write16(Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr)); + + Write16(0); // e_phentsize = prog header entry size + Write16(0); // e_phnum = # prog header entries = 0 + + // e_shentsize = Section header entry size + Write16(Is64Bit ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr)); + + // e_shnum = # of section header ents + Write16(NumberOfSections); + + // e_shstrndx = Section # of '.shstrtab' + Write16(ShstrtabIndex); +} + +void ELFObjectWriterImpl::WriteSymbolEntry(MCDataFragment *F, uint64_t name, + uint8_t info, uint64_t value, + uint64_t size, uint8_t other, + uint16_t shndx) { + if (Is64Bit) { + char buf[8]; + + String32(buf, name); + F->getContents() += StringRef(buf, 4); // st_name + + String8(buf, info); + F->getContents() += StringRef(buf, 1); // st_info + + String8(buf, other); + F->getContents() += StringRef(buf, 1); // st_other + + String16(buf, shndx); + F->getContents() += StringRef(buf, 2); // st_shndx + + String64(buf, value); + F->getContents() += StringRef(buf, 8); // st_value + + String64(buf, size); + F->getContents() += StringRef(buf, 8); // st_size + } else { + char buf[4]; + + String32(buf, name); + F->getContents() += StringRef(buf, 4); // st_name + + String32(buf, value); + F->getContents() += StringRef(buf, 4); // st_value + + String32(buf, size); + F->getContents() += StringRef(buf, 4); // st_size + + String8(buf, info); + F->getContents() += StringRef(buf, 1); // st_info + + String8(buf, other); + F->getContents() += StringRef(buf, 1); // st_other + + String16(buf, shndx); + F->getContents() += StringRef(buf, 2); // st_shndx + } +} + +void ELFObjectWriterImpl::WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD, + const MCAsmLayout &Layout) { + MCSymbolData &Data = *MSD.SymbolData; + uint8_t Info = (Data.getFlags() & 0xff); + uint8_t Other = ((Data.getFlags() & 0xf00) >> ELF_STV_Shift); + uint64_t Value = 0; + uint64_t Size = 0; + const MCExpr *ESize; + + if (Data.isCommon() && Data.isExternal()) + Value = Data.getCommonAlignment(); + + if (!Data.isCommon()) + if (MCFragment *FF = Data.getFragment()) + Value = Layout.getSymbolAddress(&Data) - + Layout.getSectionAddress(FF->getParent()); + + ESize = Data.getSize(); + if (Data.getSize()) { + MCValue Res; + if (ESize->getKind() == MCExpr::Binary) { + const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(ESize); + + if (BE->EvaluateAsRelocatable(Res, &Layout)) { + MCSymbolData &A = + Layout.getAssembler().getSymbolData(Res.getSymA()->getSymbol()); + MCSymbolData &B = + Layout.getAssembler().getSymbolData(Res.getSymB()->getSymbol()); + + Size = Layout.getSymbolAddress(&A) - Layout.getSymbolAddress(&B); + } + } else if (ESize->getKind() == MCExpr::Constant) { + Size = static_cast<const MCConstantExpr *>(ESize)->getValue(); + } else { + assert(0 && "Unsupported size expression"); + } + } + + // Write out the symbol table entry + WriteSymbolEntry(F, MSD.StringIndex, Info, Value, + Size, Other, MSD.SectionIndex); +} + +void ELFObjectWriterImpl::WriteSymbolTable(MCDataFragment *F, + const MCAssembler &Asm, + const MCAsmLayout &Layout) { + // The string table must be emitted first because we need the index + // into the string table for all the symbol names. + assert(StringTable.size() && "Missing string table"); + + // FIXME: Make sure the start of the symbol table is aligned. + + // The first entry is the undefined symbol entry. + unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32; + F->getContents().append(EntrySize, '\x00'); + + // Write the symbol table entries. + LastLocalSymbolIndex = LocalSymbolData.size() + 1; + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) { + ELFSymbolData &MSD = LocalSymbolData[i]; + WriteSymbol(F, MSD, Layout); + } + + // Write out a symbol table entry for each section. + // leaving out the just added .symtab which is at + // the very end + unsigned Index = 1; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) { + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(it->getSection()); + // Leave out relocations so we don't have indexes within + // the relocations messed up + if (Section.getType() == ELF::SHT_RELA || Section.getType() == ELF::SHT_REL) + continue; + if (Index == Asm.size()) + continue; + WriteSymbolEntry(F, 0, ELF::STT_SECTION, 0, 0, ELF::STV_DEFAULT, Index); + LastLocalSymbolIndex++; + } + + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) { + ELFSymbolData &MSD = ExternalSymbolData[i]; + MCSymbolData &Data = *MSD.SymbolData; + assert((Data.getFlags() & ELF_STB_Global) && + "External symbol requires STB_GLOBAL flag"); + WriteSymbol(F, MSD, Layout); + if (Data.getFlags() & ELF_STB_Local) + LastLocalSymbolIndex++; + } + + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) { + ELFSymbolData &MSD = UndefinedSymbolData[i]; + MCSymbolData &Data = *MSD.SymbolData; + Data.setFlags(Data.getFlags() | ELF_STB_Global); + WriteSymbol(F, MSD, Layout); + if (Data.getFlags() & ELF_STB_Local) + LastLocalSymbolIndex++; + } +} + +// FIXME: this is currently X86/X86_64 only +void ELFObjectWriterImpl::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { + int64_t Addend = 0; + unsigned Index = 0; + int64_t Value = Target.getConstant(); + + if (!Target.isAbsolute()) { + const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); + MCSymbolData &SD = Asm.getSymbolData(*Symbol); + const MCSymbolData *Base = Asm.getAtom(Layout, &SD); + MCFragment *F = SD.getFragment(); + + if (Base) { + if (F && (!Symbol->isInSection() || SD.isCommon()) && !SD.isExternal()) { + Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1; + Value += Layout.getSymbolAddress(&SD); + } else + Index = getSymbolIndexInSymbolTable(Asm, Symbol); + if (Base != &SD) + Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base); + Addend = Value; + // Compensate for the addend on i386. + if (Is64Bit) + Value = 0; + } else { + if (F) { + // Index of the section in .symtab against this symbol + // is being relocated + 2 (empty section + abs. symbols). + Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1; + + MCSectionData *FSD = F->getParent(); + // Offset of the symbol in the section + Addend = Layout.getSymbolAddress(&SD) - Layout.getSectionAddress(FSD); + } else { + FixedValue = Value; + return; + } + } + } + + FixedValue = Value; + + // determine the type of the relocation + bool IsPCRel = isFixupKindX86PCRel(Fixup.getKind()); + unsigned Type; + if (Is64Bit) { + if (IsPCRel) { + Type = ELF::R_X86_64_PC32; + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case FK_Data_8: Type = ELF::R_X86_64_64; break; + case X86::reloc_pcrel_4byte: + case FK_Data_4: + // check that the offset fits within a signed long + if (isInt<32>(Target.getConstant())) + Type = ELF::R_X86_64_32S; + else + Type = ELF::R_X86_64_32; + break; + case FK_Data_2: Type = ELF::R_X86_64_16; break; + case X86::reloc_pcrel_1byte: + case FK_Data_1: Type = ELF::R_X86_64_8; break; + } + } + } else { + if (IsPCRel) { + Type = ELF::R_386_PC32; + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case X86::reloc_pcrel_4byte: + case FK_Data_4: Type = ELF::R_386_32; break; + case FK_Data_2: Type = ELF::R_386_16; break; + case X86::reloc_pcrel_1byte: + case FK_Data_1: Type = ELF::R_386_8; break; + } + } + } + + ELFRelocationEntry ERE; + + if (Is64Bit) { + struct ELF::Elf64_Rela ERE64; + ERE64.setSymbolAndType(Index, Type); + ERE.r_info = ERE64.r_info; + } else { + struct ELF::Elf32_Rela ERE32; + ERE32.setSymbolAndType(Index, Type); + ERE.r_info = ERE32.r_info; + } + + ERE.r_offset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + + if (HasRelocationAddend) + ERE.r_addend = Addend; + else + ERE.r_addend = 0; // Silence compiler warning. + + Relocations[Fragment->getParent()].push_back(ERE); +} + +uint64_t +ELFObjectWriterImpl::getSymbolIndexInSymbolTable(const MCAssembler &Asm, + const MCSymbol *S) { + MCSymbolData &SD = Asm.getSymbolData(*S); + + // Local symbol. + if (!SD.isExternal() && !S->isUndefined()) + return SD.getIndex() + /* empty symbol */ 1; + + // External or undefined symbol. + return SD.getIndex() + Asm.size() + /* empty symbol */ 1; +} + +void ELFObjectWriterImpl::ComputeSymbolTable(MCAssembler &Asm) { + // Build section lookup table. + DenseMap<const MCSection*, uint8_t> SectionIndexMap; + unsigned Index = 1; + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) + SectionIndexMap[&it->getSection()] = Index; + + // Index 0 is always the empty string. + StringMap<uint64_t> StringIndexMap; + StringTable += '\x00'; + + // Add the data for local symbols. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(Symbol)) + continue; + + if (it->isExternal() || Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + ELFSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isAbsolute()) { + MSD.SectionIndex = ELF::SHN_ABS; + LocalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + LocalSymbolData.push_back(MSD); + } + } + + // Now add non-local symbols. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(Symbol)) + continue; + + if (!it->isExternal() && !Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + ELFSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isUndefined()) { + MSD.SectionIndex = ELF::SHN_UNDEF; + // XXX: for some reason we dont Emit* this + it->setFlags(it->getFlags() | ELF_STB_Global); + UndefinedSymbolData.push_back(MSD); + } else if (Symbol.isAbsolute()) { + MSD.SectionIndex = ELF::SHN_ABS; + ExternalSymbolData.push_back(MSD); + } else if (it->isCommon()) { + MSD.SectionIndex = ELF::SHN_COMMON; + ExternalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + ExternalSymbolData.push_back(MSD); + } + } + + // Symbols are required to be in lexicographic order. + array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end()); + array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); + array_pod_sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); + + // Set the symbol indices. Local symbols must come before all other + // symbols with non-local bindings. + Index = 0; + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + LocalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + ExternalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + UndefinedSymbolData[i].SymbolData->setIndex(Index++); +} + +void ELFObjectWriterImpl::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout, + const MCSectionData &SD) { + if (!Relocations[&SD].empty()) { + MCContext &Ctx = Asm.getContext(); + const MCSection *RelaSection; + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(SD.getSection()); + + const StringRef SectionName = Section.getSectionName(); + std::string RelaSectionName = HasRelocationAddend ? ".rela" : ".rel"; + RelaSectionName += SectionName; + + unsigned EntrySize; + if (HasRelocationAddend) + EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela); + else + EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel); + + RelaSection = Ctx.getELFSection(RelaSectionName, HasRelocationAddend ? + ELF::SHT_RELA : ELF::SHT_REL, 0, + SectionKind::getReadOnly(), + false, EntrySize); + + MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection); + RelaSD.setAlignment(1); + + MCDataFragment *F = new MCDataFragment(&RelaSD); + + WriteRelocationsFragment(Asm, F, &SD); + + Asm.AddSectionToTheEnd(RelaSD, Layout); + } +} + +void ELFObjectWriterImpl::WriteSecHdrEntry(uint32_t Name, uint32_t Type, + uint64_t Flags, uint64_t Address, + uint64_t Offset, uint64_t Size, + uint32_t Link, uint32_t Info, + uint64_t Alignment, + uint64_t EntrySize) { + Write32(Name); // sh_name: index into string table + Write32(Type); // sh_type + WriteWord(Flags); // sh_flags + WriteWord(Address); // sh_addr + WriteWord(Offset); // sh_offset + WriteWord(Size); // sh_size + Write32(Link); // sh_link + Write32(Info); // sh_info + WriteWord(Alignment); // sh_addralign + WriteWord(EntrySize); // sh_entsize +} + +void ELFObjectWriterImpl::WriteRelocationsFragment(const MCAssembler &Asm, + MCDataFragment *F, + const MCSectionData *SD) { + std::vector<ELFRelocationEntry> &Relocs = Relocations[SD]; + // sort by the r_offset just like gnu as does + array_pod_sort(Relocs.begin(), Relocs.end()); + + for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { + ELFRelocationEntry entry = Relocs[e - i - 1]; + + unsigned WordSize = Is64Bit ? 8 : 4; + F->getContents() += StringRef((const char *)&entry.r_offset, WordSize); + F->getContents() += StringRef((const char *)&entry.r_info, WordSize); + + if (HasRelocationAddend) + F->getContents() += StringRef((const char *)&entry.r_addend, WordSize); + } +} + +void ELFObjectWriterImpl::CreateMetadataSections(MCAssembler &Asm, + MCAsmLayout &Layout) { + MCContext &Ctx = Asm.getContext(); + MCDataFragment *F; + + WriteRelocations(Asm, Layout); + + const MCSection *SymtabSection; + unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32; + + SymtabSection = Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0, + SectionKind::getReadOnly(), + false, EntrySize); + + MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection); + + SymtabSD.setAlignment(Is64Bit ? 8 : 4); + + F = new MCDataFragment(&SymtabSD); + + // Symbol table + WriteSymbolTable(F, Asm, Layout); + Asm.AddSectionToTheEnd(SymtabSD, Layout); + + const MCSection *StrtabSection; + StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0, + SectionKind::getReadOnly(), false); + + MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection); + StrtabSD.setAlignment(1); + + // FIXME: This isn't right. If the sections get rearranged this will + // be wrong. We need a proper lookup. + StringTableIndex = Asm.size(); + + F = new MCDataFragment(&StrtabSD); + F->getContents().append(StringTable.begin(), StringTable.end()); + Asm.AddSectionToTheEnd(StrtabSD, Layout); + + const MCSection *ShstrtabSection; + ShstrtabSection = Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0, + SectionKind::getReadOnly(), false); + + MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection); + ShstrtabSD.setAlignment(1); + + F = new MCDataFragment(&ShstrtabSD); + + // FIXME: This isn't right. If the sections get rearranged this will + // be wrong. We need a proper lookup. + ShstrtabIndex = Asm.size(); + + // Section header string table. + // + // The first entry of a string table holds a null character so skip + // section 0. + uint64_t Index = 1; + F->getContents() += '\x00'; + + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(it->getSection()); + + // Remember the index into the string table so we can write it + // into the sh_name field of the section header table. + SectionStringTableIndex[&it->getSection()] = Index; + + Index += Section.getSectionName().size() + 1; + F->getContents() += Section.getSectionName(); + F->getContents() += '\x00'; + } + + Asm.AddSectionToTheEnd(ShstrtabSD, Layout); +} + +void ELFObjectWriterImpl::WriteObject(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + CreateMetadataSections(const_cast<MCAssembler&>(Asm), + const_cast<MCAsmLayout&>(Layout)); + + // Add 1 for the null section. + unsigned NumSections = Asm.size() + 1; + + uint64_t SectionDataSize = 0; + + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + + // Get the size of the section in the output file (including padding). + uint64_t Size = Layout.getSectionFileSize(&SD); + SectionDataSize += Size; + } + + // Write out the ELF header ... + WriteHeader(SectionDataSize, NumSections); + FileOff = Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr); + + // ... then all of the sections ... + DenseMap<const MCSection*, uint64_t> SectionOffsetMap; + + DenseMap<const MCSection*, uint8_t> SectionIndexMap; + + unsigned Index = 1; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + // Remember the offset into the file for this section. + SectionOffsetMap[&it->getSection()] = FileOff; + + SectionIndexMap[&it->getSection()] = Index++; + + const MCSectionData &SD = *it; + FileOff += Layout.getSectionFileSize(&SD); + + Asm.WriteSectionData(it, Layout, Writer); + } + + // ... and then the section header table. + // Should we align the section header table? + // + // Null section first. + WriteSecHdrEntry(0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(SD.getSection()); + + uint64_t sh_link = 0; + uint64_t sh_info = 0; + + switch(Section.getType()) { + case ELF::SHT_DYNAMIC: + sh_link = SectionStringTableIndex[&it->getSection()]; + sh_info = 0; + break; + + case ELF::SHT_REL: + case ELF::SHT_RELA: { + const MCSection *SymtabSection; + const MCSection *InfoSection; + + SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB, 0, + SectionKind::getReadOnly(), + false); + sh_link = SectionIndexMap[SymtabSection]; + + // Remove ".rel" and ".rela" prefixes. + unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5; + StringRef SectionName = Section.getSectionName().substr(SecNameLen); + + InfoSection = Asm.getContext().getELFSection(SectionName, + ELF::SHT_PROGBITS, 0, + SectionKind::getReadOnly(), + false); + sh_info = SectionIndexMap[InfoSection]; + break; + } + + case ELF::SHT_SYMTAB: + case ELF::SHT_DYNSYM: + sh_link = StringTableIndex; + sh_info = LastLocalSymbolIndex; + break; + + case ELF::SHT_PROGBITS: + case ELF::SHT_STRTAB: + case ELF::SHT_NOBITS: + case ELF::SHT_NULL: + // Nothing to do. + break; + + case ELF::SHT_HASH: + case ELF::SHT_GROUP: + case ELF::SHT_SYMTAB_SHNDX: + default: + assert(0 && "FIXME: sh_type value not supported!"); + break; + } + + WriteSecHdrEntry(SectionStringTableIndex[&it->getSection()], + Section.getType(), Section.getFlags(), + Layout.getSectionAddress(&SD), + SectionOffsetMap.lookup(&SD.getSection()), + Layout.getSectionSize(&SD), sh_link, + sh_info, SD.getAlignment(), + Section.getEntrySize()); + } +} + +ELFObjectWriter::ELFObjectWriter(raw_ostream &OS, + bool Is64Bit, + bool IsLittleEndian, + bool HasRelocationAddend) + : MCObjectWriter(OS, IsLittleEndian) +{ + Impl = new ELFObjectWriterImpl(this, Is64Bit, HasRelocationAddend); +} + +ELFObjectWriter::~ELFObjectWriter() { + delete (ELFObjectWriterImpl*) Impl; +} + +void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { + ((ELFObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm); +} + +void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + ((ELFObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); +} + +void ELFObjectWriter::WriteObject(const MCAssembler &Asm, + const MCAsmLayout &Layout) { + ((ELFObjectWriterImpl*) Impl)->WriteObject(Asm, Layout); +} diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index a275be2c53c5f..670b2e9b292a5 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -68,7 +68,9 @@ MCAsmInfo::MCAsmInfo() { ExceptionsType = ExceptionHandling::None; DwarfRequiresFrameSection = true; DwarfUsesInlineInfoSection = false; + DwarfUsesAbsoluteLabelForStmtList = true; DwarfSectionOffsetDirective = 0; + DwarfUsesLabelOffsetForRanges = true; HasMicrosoftFastStdCallMangling = false; AsmTransCBE = 0; diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 0bd3b2d001e8c..e0e261a63c707 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -44,5 +44,8 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { HasDotTypeDotSizeDirective = false; HasNoDeadStrip = true; + + DwarfUsesAbsoluteLabelForStmtList = false; + DwarfUsesLabelOffsetForRanges = false; } diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index e272b60c44759..1cc8fb0b54867 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -31,7 +31,7 @@ class MCAsmStreamer : public MCStreamer { formatted_raw_ostream &OS; const MCAsmInfo &MAI; OwningPtr<MCInstPrinter> InstPrinter; - MCCodeEmitter *Emitter; + OwningPtr<MCCodeEmitter> Emitter; SmallString<128> CommentToEmit; raw_svector_ostream CommentStream; @@ -217,6 +217,7 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { void MCAsmStreamer::SwitchSection(const MCSection *Section) { assert(Section && "Cannot switch to a null section!"); if (Section != CurSection) { + PrevSection = CurSection; CurSection = Section; Section->PrintSwitchToSection(MAI, OS); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 7d8455492780c..f0e1d7fbc21c5 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -178,8 +178,12 @@ uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const { MCFragment::MCFragment() : Kind(FragmentType(~0)) { } +MCFragment::~MCFragment() { +} + MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent) - : Kind(_Kind), Parent(_Parent), Atom(0), EffectiveSize(~UINT64_C(0)) + : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0)), + EffectiveSize(~UINT64_C(0)) { if (Parent) Parent->getFragmentList().push_back(this); @@ -207,7 +211,8 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, uint64_t _Offset, MCAssembler *A) : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset), IsExternal(false), IsPrivateExtern(false), - CommonSize(0), CommonAlign(0), Flags(0), Index(0) + CommonSize(0), SymbolSize(0), CommonAlign(0), + Flags(0), Index(0) { if (A) A->getSymbolList().push_back(this); @@ -623,8 +628,23 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD, switch (it->getKind()) { default: assert(0 && "Invalid fragment in virtual section!"); + case MCFragment::FT_Data: { + // Check that we aren't trying to write a non-zero contents (or fixups) + // into a virtual section. This is to support clients which use standard + // directives to fill the contents of virtual sections. + MCDataFragment &DF = cast<MCDataFragment>(*it); + assert(DF.fixup_begin() == DF.fixup_end() && + "Cannot have fixups in virtual section!"); + for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i) + assert(DF.getContents()[i] == 0 && + "Invalid data value for virtual section!"); + break; + } case MCFragment::FT_Align: - assert(!cast<MCAlignFragment>(it)->getValueSize() && + // Check that we aren't trying to write a non-zero value into a virtual + // section. + assert((!cast<MCAlignFragment>(it)->getValueSize() || + !cast<MCAlignFragment>(it)->getValue()) && "Invalid align in virtual section!"); break; case MCFragment::FT_Fill: @@ -647,7 +667,41 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD, assert(OW->getStream().tell() - Start == Layout.getSectionFileSize(SD)); } -void MCAssembler::Finish() { +void MCAssembler::AddSectionToTheEnd(MCSectionData &SD, MCAsmLayout &Layout) { + // Create dummy fragments and assign section ordinals. + unsigned SectionIndex = 0; + for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) + SectionIndex++; + + SD.setOrdinal(SectionIndex); + + // Assign layout order indices to sections and fragments. + unsigned FragmentIndex = 0; + unsigned i = 0; + for (unsigned e = Layout.getSectionOrder().size(); i != e; ++i) { + MCSectionData *SD = Layout.getSectionOrder()[i]; + + for (MCSectionData::iterator it2 = SD->begin(), + ie2 = SD->end(); it2 != ie2; ++it2) + FragmentIndex++; + } + + SD.setLayoutOrder(i); + for (MCSectionData::iterator it2 = SD.begin(), + ie2 = SD.end(); it2 != ie2; ++it2) { + it2->setLayoutOrder(FragmentIndex++); + } + Layout.getSectionOrder().push_back(&SD); + + Layout.LayoutSection(&SD); + + // Layout until everything fits. + while (LayoutOnce(Layout)) + continue; + +} + +void MCAssembler::Finish(MCObjectWriter *Writer) { DEBUG_WITH_TYPE("mc-dump", { llvm::errs() << "assembler backend - pre-layout\n--\n"; dump(); }); @@ -717,9 +771,15 @@ void MCAssembler::Finish() { dump(); }); uint64_t StartOffset = OS.tell(); - llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS)); - if (!Writer) - report_fatal_error("unable to create object writer!"); + + llvm::OwningPtr<MCObjectWriter> OwnWriter(0); + if (Writer == 0) { + //no custom Writer_ : create the default one life-managed by OwningPtr + OwnWriter.reset(getBackend().createObjectWriter(OS)); + Writer = OwnWriter.get(); + if (!Writer) + report_fatal_error("unable to create object writer!"); + } // Allow the object writer a chance to perform post-layout binding (for // example, to set the index fields in the symbol data). diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 11370642530a1..e5586a0d7c311 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCLabel.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" using namespace llvm; @@ -23,7 +24,8 @@ typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; -MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) { +MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0), + CurrentDwarfLoc(0,0,0,0,0) { MachOUniquingMap = 0; ELFUniquingMap = 0; COFFUniquingMap = 0; @@ -31,6 +33,8 @@ MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) { SecureLogFile = getenv("AS_SECURE_LOG_FILE"); SecureLog = 0; SecureLogUsed = false; + + DwarfLocSeen = false; } MCContext::~MCContext() { @@ -147,7 +151,7 @@ getMachOSection(StringRef Segment, StringRef Section, const MCSection *MCContext:: getELFSection(StringRef Section, unsigned Type, unsigned Flags, - SectionKind Kind, bool IsExplicit) { + SectionKind Kind, bool IsExplicit, unsigned EntrySize) { if (ELFUniquingMap == 0) ELFUniquingMap = new ELFUniqueMapTy(); ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap; @@ -157,7 +161,7 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags, if (Entry.getValue()) return Entry.getValue(); MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags, - Kind, IsExplicit); + Kind, IsExplicit, EntrySize); Entry.setValue(Result); return Result; } @@ -181,3 +185,81 @@ const MCSection *MCContext::getCOFFSection(StringRef Section, Entry.setValue(Result); return Result; } + +//===----------------------------------------------------------------------===// +// Dwarf Management +//===----------------------------------------------------------------------===// + +/// GetDwarfFile - takes a file name an number to place in the dwarf file and +/// directory tables. If the file number has already been allocated it is an +/// error and zero is returned and the client reports the error, else the +/// allocated file number is returned. The file numbers may be in any order. +unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) { + // TODO: a FileNumber of zero says to use the next available file number. + // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked + // to not be less than one. This needs to be change to be not less than zero. + + // Make space for this FileNumber in the MCDwarfFiles vector if needed. + if (FileNumber >= MCDwarfFiles.size()) { + MCDwarfFiles.resize(FileNumber + 1); + } else { + MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber]; + if (ExistingFile) + // It is an error to use see the same number more than once. + return 0; + } + + // Get the new MCDwarfFile slot for this FileNumber. + MCDwarfFile *&File = MCDwarfFiles[FileNumber]; + + // Separate the directory part from the basename of the FileName. + std::pair<StringRef, StringRef> Slash = FileName.rsplit('/'); + + // Find or make a entry in the MCDwarfDirs vector for this Directory. + StringRef Name; + unsigned DirIndex; + // Capture directory name. + if (Slash.second.empty()) { + Name = Slash.first; + DirIndex = 0; // For FileNames with no directories a DirIndex of 0 is used. + } else { + StringRef Directory = Slash.first; + Name = Slash.second; + for (DirIndex = 0; DirIndex < MCDwarfDirs.size(); DirIndex++) { + if (Directory == MCDwarfDirs[DirIndex]) + break; + } + if (DirIndex >= MCDwarfDirs.size()) { + char *Buf = static_cast<char *>(Allocate(Directory.size())); + memcpy(Buf, Directory.data(), Directory.size()); + MCDwarfDirs.push_back(StringRef(Buf, Directory.size())); + } + // The DirIndex is one based, as DirIndex of 0 is used for FileNames with + // no directories. MCDwarfDirs[] is unlike MCDwarfFiles[] in that the + // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames are + // stored at MCDwarfFiles[FileNumber].Name . + DirIndex++; + } + + // Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles + // vector. + char *Buf = static_cast<char *>(Allocate(Name.size())); + memcpy(Buf, Name.data(), Name.size()); + File = new (*this) MCDwarfFile(StringRef(Buf, Name.size()), DirIndex); + + // return the allocated FileNumber. + return FileNumber; +} + +/// ValidateDwarfFileNumber - takes a dwarf file number and returns true if it +/// currently is assigned and false otherwise. +bool MCContext::ValidateDwarfFileNumber(unsigned FileNumber) { + if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size()) + return false; + + MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber]; + if (ExistingFile) + return true; + else + return false; +} diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt new file mode 100644 index 0000000000000..5fa7b70194b24 --- /dev/null +++ b/lib/MC/MCDisassembler/CMakeLists.txt @@ -0,0 +1,7 @@ + +add_llvm_library(LLVMMCDisassembler + EDDisassembler.cpp + EDOperand.cpp + EDInst.cpp + EDToken.cpp + ) diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp new file mode 100644 index 0000000000000..697b3d9c05153 --- /dev/null +++ b/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -0,0 +1,402 @@ +//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's disassembler class. +// The disassembler is responsible for vending individual instructions according +// to a given architecture and disassembly syntax. +// +//===----------------------------------------------------------------------===// + +#include "EDDisassembler.h" +#include "EDInst.h" +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetAsmLexer.h" +#include "llvm/Target/TargetAsmParser.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSelect.h" +using namespace llvm; + +bool EDDisassembler::sInitialized = false; +EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers; + +struct TripleMap { + Triple::ArchType Arch; + const char *String; +}; + +static struct TripleMap triplemap[] = { + { Triple::x86, "i386-unknown-unknown" }, + { Triple::x86_64, "x86_64-unknown-unknown" }, + { Triple::arm, "arm-unknown-unknown" }, + { Triple::thumb, "thumb-unknown-unknown" }, + { Triple::InvalidArch, NULL, } +}; + +/// infoFromArch - Returns the TripleMap corresponding to a given architecture, +/// or NULL if there is an error +/// +/// @arg arch - The Triple::ArchType for the desired architecture +static const char *tripleFromArch(Triple::ArchType arch) { + unsigned int infoIndex; + + for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) { + if (arch == triplemap[infoIndex].Arch) + return triplemap[infoIndex].String; + } + + return NULL; +} + +/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer +/// for the desired assembly syntax, suitable for passing to +/// Target::createMCInstPrinter() +/// +/// @arg arch - The target architecture +/// @arg syntax - The assembly syntax in sd form +static int getLLVMSyntaxVariant(Triple::ArchType arch, + EDDisassembler::AssemblySyntax syntax) { + switch (syntax) { + default: + return -1; + // Mappings below from X86AsmPrinter.cpp + case EDDisassembler::kEDAssemblySyntaxX86ATT: + if (arch == Triple::x86 || arch == Triple::x86_64) + return 0; + else + return -1; + case EDDisassembler::kEDAssemblySyntaxX86Intel: + if (arch == Triple::x86 || arch == Triple::x86_64) + return 1; + else + return -1; + case EDDisassembler::kEDAssemblySyntaxARMUAL: + if (arch == Triple::arm || arch == Triple::thumb) + return 0; + else + return -1; + } +} + +void EDDisassembler::initialize() { + if (sInitialized) + return; + + sInitialized = true; + + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + InitializeAllDisassemblers(); +} + +#undef BRINGUP_TARGET + +EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch, + AssemblySyntax syntax) { + CPUKey key; + key.Arch = arch; + key.Syntax = syntax; + + EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key); + + if (i != sDisassemblers.end()) { + return i->second; + } else { + EDDisassembler* sdd = new EDDisassembler(key); + if (!sdd->valid()) { + delete sdd; + return NULL; + } + + sDisassemblers[key] = sdd; + + return sdd; + } + + return NULL; +} + +EDDisassembler *EDDisassembler::getDisassembler(StringRef str, + AssemblySyntax syntax) { + return getDisassembler(Triple(str).getArch(), syntax); +} + +EDDisassembler::EDDisassembler(CPUKey &key) : + Valid(false), + HasSemantics(false), + ErrorStream(nulls()), + Key(key) { + const char *triple = tripleFromArch(key.Arch); + + if (!triple) + return; + + LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax); + + if (LLVMSyntaxVariant < 0) + return; + + std::string tripleString(triple); + std::string errorString; + + Tgt = TargetRegistry::lookupTarget(tripleString, + errorString); + + if (!Tgt) + return; + + std::string featureString; + + TargetMachine.reset(Tgt->createTargetMachine(tripleString, + featureString)); + + const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo(); + + if (!registerInfo) + return; + + initMaps(*registerInfo); + + AsmInfo.reset(Tgt->createAsmInfo(tripleString)); + + if (!AsmInfo) + return; + + Disassembler.reset(Tgt->createMCDisassembler()); + + if (!Disassembler) + return; + + InstInfos = Disassembler->getEDInfo(); + + InstString.reset(new std::string); + InstStream.reset(new raw_string_ostream(*InstString)); + InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo)); + + if (!InstPrinter) + return; + + GenericAsmLexer.reset(new AsmLexer(*AsmInfo)); + SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo)); + SpecificAsmLexer->InstallLexer(*GenericAsmLexer); + + initMaps(*TargetMachine->getRegisterInfo()); + + Valid = true; +} + +EDDisassembler::~EDDisassembler() { + if (!valid()) + return; +} + +namespace { + /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback + /// as provided by the sd interface. See MemoryObject. + class EDMemoryObject : public llvm::MemoryObject { + private: + EDByteReaderCallback Callback; + void *Arg; + public: + EDMemoryObject(EDByteReaderCallback callback, + void *arg) : Callback(callback), Arg(arg) { } + ~EDMemoryObject() { } + uint64_t getBase() const { return 0x0; } + uint64_t getExtent() const { return (uint64_t)-1; } + int readByte(uint64_t address, uint8_t *ptr) const { + if (!Callback) + return -1; + + if (Callback(ptr, address, Arg)) + return -1; + + return 0; + } + }; +} + +EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, + uint64_t address, + void *arg) { + EDMemoryObject memoryObject(byteReader, arg); + + MCInst* inst = new MCInst; + uint64_t byteSize; + + if (!Disassembler->getInstruction(*inst, + byteSize, + memoryObject, + address, + ErrorStream)) { + delete inst; + return NULL; + } else { + const llvm::EDInstInfo *thisInstInfo; + + thisInstInfo = &InstInfos[inst->getOpcode()]; + + EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); + return sdInst; + } +} + +void EDDisassembler::initMaps(const TargetRegisterInfo ®isterInfo) { + unsigned numRegisters = registerInfo.getNumRegs(); + unsigned registerIndex; + + for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) { + const char* registerName = registerInfo.get(registerIndex).Name; + + RegVec.push_back(registerName); + RegRMap[registerName] = registerIndex; + } + + switch (Key.Arch) { + default: + break; + case Triple::x86: + case Triple::x86_64: + stackPointers.insert(registerIDWithName("SP")); + stackPointers.insert(registerIDWithName("ESP")); + stackPointers.insert(registerIDWithName("RSP")); + + programCounters.insert(registerIDWithName("IP")); + programCounters.insert(registerIDWithName("EIP")); + programCounters.insert(registerIDWithName("RIP")); + break; + case Triple::arm: + case Triple::thumb: + stackPointers.insert(registerIDWithName("SP")); + + programCounters.insert(registerIDWithName("PC")); + break; + } +} + +const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const { + if (registerID >= RegVec.size()) + return NULL; + else + return RegVec[registerID].c_str(); +} + +unsigned EDDisassembler::registerIDWithName(const char *name) const { + regrmap_t::const_iterator iter = RegRMap.find(std::string(name)); + if (iter == RegRMap.end()) + return 0; + else + return (*iter).second; +} + +bool EDDisassembler::registerIsStackPointer(unsigned registerID) { + return (stackPointers.find(registerID) != stackPointers.end()); +} + +bool EDDisassembler::registerIsProgramCounter(unsigned registerID) { + return (programCounters.find(registerID) != programCounters.end()); +} + +int EDDisassembler::printInst(std::string &str, MCInst &inst) { + PrinterMutex.acquire(); + + InstPrinter->printInst(&inst, *InstStream); + InstStream->flush(); + str = *InstString; + InstString->clear(); + + PrinterMutex.release(); + + return 0; +} + +int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands, + SmallVectorImpl<AsmToken> &tokens, + const std::string &str) { + int ret = 0; + + switch (Key.Arch) { + default: + return -1; + case Triple::x86: + case Triple::x86_64: + case Triple::arm: + case Triple::thumb: + break; + } + + const char *cStr = str.c_str(); + MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr)); + + StringRef instName; + SMLoc instLoc; + + SourceMgr sourceMgr; + sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over + MCContext context(*AsmInfo); + OwningPtr<MCStreamer> streamer(createNullStreamer(context)); + OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr, + context, *streamer, + *AsmInfo)); + OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser, + *TargetMachine)); + + AsmToken OpcodeToken = genericParser->Lex(); + AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to + + if (OpcodeToken.is(AsmToken::Identifier)) { + instName = OpcodeToken.getString(); + instLoc = OpcodeToken.getLoc(); + + if (NextToken.isNot(AsmToken::Eof) && + TargetParser->ParseInstruction(instName, instLoc, operands)) + ret = -1; + } else { + ret = -1; + } + + ParserMutex.acquire(); + + if (!ret) { + GenericAsmLexer->setBuffer(buf); + + while (SpecificAsmLexer->Lex(), + SpecificAsmLexer->isNot(AsmToken::Eof) && + SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) { + if (SpecificAsmLexer->is(AsmToken::Error)) { + ret = -1; + break; + } + tokens.push_back(SpecificAsmLexer->getTok()); + } + } + + ParserMutex.release(); + + return ret; +} + +int EDDisassembler::llvmSyntaxVariant() const { + return LLVMSyntaxVariant; +} diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h new file mode 100644 index 0000000000000..e2f850bcdba97 --- /dev/null +++ b/lib/MC/MCDisassembler/EDDisassembler.h @@ -0,0 +1,271 @@ +//===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// disassembler class. The disassembler is responsible for vending individual +// instructions according to a given architecture and disassembly syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDDISASSEMBLER_H +#define LLVM_EDDISASSEMBLER_H + +#include "EDInfo.h" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Mutex.h" + +#include <map> +#include <set> +#include <vector> + +namespace llvm { +class AsmLexer; +class AsmToken; +class MCContext; +class MCAsmInfo; +class MCAsmLexer; +class AsmParser; +class TargetAsmLexer; +class TargetAsmParser; +class MCDisassembler; +class MCInstPrinter; +class MCInst; +class MCParsedAsmOperand; +class MCStreamer; +template <typename T> class SmallVectorImpl; +class SourceMgr; +class Target; +class TargetMachine; +class TargetRegisterInfo; + +struct EDInstInfo; +struct EDInst; +struct EDOperand; +struct EDToken; + +typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg); + +/// EDDisassembler - Encapsulates a disassembler for a single architecture and +/// disassembly syntax. Also manages the static disassembler registry. +struct EDDisassembler { + typedef enum { + /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */ + kEDAssemblySyntaxX86Intel = 0, + /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */ + kEDAssemblySyntaxX86ATT = 1, + kEDAssemblySyntaxARMUAL = 2 + } AssemblySyntax; + + + //////////////////// + // Static members // + //////////////////// + + /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax + /// pair + struct CPUKey { + /// The architecture type + llvm::Triple::ArchType Arch; + + /// The assembly syntax + AssemblySyntax Syntax; + + /// operator== - Equality operator + bool operator==(const CPUKey &key) const { + return (Arch == key.Arch && + Syntax == key.Syntax); + } + + /// operator< - Less-than operator + bool operator<(const CPUKey &key) const { + if(Arch > key.Arch) + return false; + if(Syntax >= key.Syntax) + return false; + return true; + } + }; + + typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t; + + /// True if the disassembler registry has been initialized; false if not + static bool sInitialized; + /// A map from disassembler specifications to disassemblers. Populated + /// lazily. + static DisassemblerMap_t sDisassemblers; + + /// getDisassembler - Returns the specified disassemble, or NULL on failure + /// + /// @arg arch - The desired architecture + /// @arg syntax - The desired disassembly syntax + static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch, + AssemblySyntax syntax); + + /// getDisassembler - Returns the disassembler for a given combination of + /// CPU type, CPU subtype, and assembly syntax, or NULL on failure + /// + /// @arg str - The string representation of the architecture triple, e.g., + /// "x86_64-apple-darwin" + /// @arg syntax - The disassembly syntax for the required disassembler + static EDDisassembler *getDisassembler(llvm::StringRef str, + AssemblySyntax syntax); + + /// initialize - Initializes the disassembler registry and the LLVM backend + static void initialize(); + + //////////////////////// + // Per-object members // + //////////////////////// + + /// True only if the object has been successfully initialized + bool Valid; + /// True if the disassembler can provide semantic information + bool HasSemantics; + + /// The stream to write errors to + llvm::raw_ostream &ErrorStream; + + /// The architecture/syntax pair for the current architecture + CPUKey Key; + /// The LLVM target corresponding to the disassembler + const llvm::Target *Tgt; + /// The target machine instance. + llvm::OwningPtr<llvm::TargetMachine> TargetMachine; + /// The assembly information for the target architecture + llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo; + /// The disassembler for the target architecture + llvm::OwningPtr<const llvm::MCDisassembler> Disassembler; + /// The output string for the instruction printer; must be guarded with + /// PrinterMutex + llvm::OwningPtr<std::string> InstString; + /// The output stream for the disassembler; must be guarded with + /// PrinterMutex + llvm::OwningPtr<llvm::raw_string_ostream> InstStream; + /// The instruction printer for the target architecture; must be guarded with + /// PrinterMutex when printing + llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter; + /// The mutex that guards the instruction printer's printing functions, which + /// use a shared stream + llvm::sys::Mutex PrinterMutex; + /// The array of instruction information provided by the TableGen backend for + /// the target architecture + const llvm::EDInstInfo *InstInfos; + /// The target-specific lexer for use in tokenizing strings, in + /// target-independent and target-specific portions + llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer; + llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer; + /// The guard for the above + llvm::sys::Mutex ParserMutex; + /// The LLVM number used for the target disassembly syntax variant + int LLVMSyntaxVariant; + + typedef std::vector<std::string> regvec_t; + typedef std::map<std::string, unsigned> regrmap_t; + + /// A vector of registers for quick mapping from LLVM register IDs to names + regvec_t RegVec; + /// A map of registers for quick mapping from register names to LLVM IDs + regrmap_t RegRMap; + + /// A set of register IDs for aliases of the stack pointer for the current + /// architecture + std::set<unsigned> stackPointers; + /// A set of register IDs for aliases of the program counter for the current + /// architecture + std::set<unsigned> programCounters; + + /// Constructor - initializes a disassembler with all the necessary objects, + /// which come pre-allocated from the registry accessor function + /// + /// @arg key - the architecture and disassembly syntax for the + /// disassembler + EDDisassembler(CPUKey& key); + + /// valid - reports whether there was a failure in the constructor. + bool valid() { + return Valid; + } + + /// hasSemantics - reports whether the disassembler can provide operands and + /// tokens. + bool hasSemantics() { + return HasSemantics; + } + + ~EDDisassembler(); + + /// createInst - creates and returns an instruction given a callback and + /// memory address, or NULL on failure + /// + /// @arg byteReader - A callback function that provides machine code bytes + /// @arg address - The address of the first byte of the instruction, + /// suitable for passing to byteReader + /// @arg arg - An opaque argument for byteReader + EDInst *createInst(EDByteReaderCallback byteReader, + uint64_t address, + void *arg); + + /// initMaps - initializes regVec and regRMap using the provided register + /// info + /// + /// @arg registerInfo - the register information to use as a source + void initMaps(const llvm::TargetRegisterInfo ®isterInfo); + /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a + /// register for a given register ID, or NULL on failure + /// + /// @arg registerID - the ID of the register to be queried + const char *nameWithRegisterID(unsigned registerID) const; + /// registerIDWithName - Returns the ID of a register for a given register + /// name, or (unsigned)-1 on failure + /// + /// @arg name - The name of the register + unsigned registerIDWithName(const char *name) const; + + /// registerIsStackPointer - reports whether a register ID is an alias for the + /// stack pointer register + /// + /// @arg registerID - The LLVM register ID + bool registerIsStackPointer(unsigned registerID); + /// registerIsStackPointer - reports whether a register ID is an alias for the + /// stack pointer register + /// + /// @arg registerID - The LLVM register ID + bool registerIsProgramCounter(unsigned registerID); + + /// printInst - prints an MCInst to a string, returning 0 on success, or -1 + /// otherwise + /// + /// @arg str - A reference to a string which is filled in with the string + /// representation of the instruction + /// @arg inst - A reference to the MCInst to be printed + int printInst(std::string& str, + llvm::MCInst& inst); + + /// parseInst - extracts operands and tokens from a string for use in + /// tokenizing the string. Returns 0 on success, or -1 otherwise. + /// + /// @arg operands - A reference to a vector that will be filled in with the + /// parsed operands + /// @arg tokens - A reference to a vector that will be filled in with the + /// tokens + /// @arg str - The string representation of the instruction + int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands, + llvm::SmallVectorImpl<llvm::AsmToken> &tokens, + const std::string &str); + + /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler + int llvmSyntaxVariant() const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/MC/MCDisassembler/EDInfo.h b/lib/MC/MCDisassembler/EDInfo.h new file mode 100644 index 0000000000000..627c06641dbc1 --- /dev/null +++ b/lib/MC/MCDisassembler/EDInfo.h @@ -0,0 +1,73 @@ +//===-- EDInfo.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDINFO_H +#define LLVM_EDINFO_H + +enum { + EDIS_MAX_OPERANDS = 13, + EDIS_MAX_SYNTAXES = 2 +}; + +enum OperandTypes { + kOperandTypeNone, + kOperandTypeImmediate, + kOperandTypeRegister, + kOperandTypeX86Memory, + kOperandTypeX86EffectiveAddress, + kOperandTypeX86PCRelative, + kOperandTypeARMBranchTarget, + kOperandTypeARMSoReg, + kOperandTypeARMSoImm, + kOperandTypeARMSoImm2Part, + kOperandTypeARMPredicate, + kOperandTypeARMAddrMode2, + kOperandTypeARMAddrMode2Offset, + kOperandTypeARMAddrMode3, + kOperandTypeARMAddrMode3Offset, + kOperandTypeARMAddrMode4, + kOperandTypeARMAddrMode5, + kOperandTypeARMAddrMode6, + kOperandTypeARMAddrMode6Offset, + kOperandTypeARMAddrModePC, + kOperandTypeARMRegisterList, + kOperandTypeARMTBAddrMode, + kOperandTypeThumbITMask, + kOperandTypeThumbAddrModeS1, + kOperandTypeThumbAddrModeS2, + kOperandTypeThumbAddrModeS4, + kOperandTypeThumbAddrModeRR, + kOperandTypeThumbAddrModeSP, + kOperandTypeThumb2SoReg, + kOperandTypeThumb2SoImm, + kOperandTypeThumb2AddrModeImm8, + kOperandTypeThumb2AddrModeImm8Offset, + kOperandTypeThumb2AddrModeImm12, + kOperandTypeThumb2AddrModeSoReg, + kOperandTypeThumb2AddrModeImm8s4, + kOperandTypeThumb2AddrModeImm8s4Offset +}; + +enum OperandFlags { + kOperandFlagSource = 0x1, + kOperandFlagTarget = 0x2 +}; + +enum InstructionTypes { + kInstructionTypeNone, + kInstructionTypeMove, + kInstructionTypeBranch, + kInstructionTypePush, + kInstructionTypePop, + kInstructionTypeCall, + kInstructionTypeReturn +}; + + +#endif diff --git a/lib/MC/MCDisassembler/EDInst.cpp b/lib/MC/MCDisassembler/EDInst.cpp new file mode 100644 index 0000000000000..e22408f060b1f --- /dev/null +++ b/lib/MC/MCDisassembler/EDInst.cpp @@ -0,0 +1,207 @@ +//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's instruction class. +// The instruction is responsible for vending the string representation, +// individual tokens, and operands for a single instruction. +// +//===----------------------------------------------------------------------===// + +#include "EDInst.h" +#include "EDDisassembler.h" +#include "EDOperand.h" +#include "EDToken.h" + +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCInst.h" + +using namespace llvm; + +EDInst::EDInst(llvm::MCInst *inst, + uint64_t byteSize, + EDDisassembler &disassembler, + const llvm::EDInstInfo *info) : + Disassembler(disassembler), + Inst(inst), + ThisInstInfo(info), + ByteSize(byteSize), + BranchTarget(-1), + MoveSource(-1), + MoveTarget(-1) { + OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()]; +} + +EDInst::~EDInst() { + unsigned int index; + unsigned int numOperands = Operands.size(); + + for (index = 0; index < numOperands; ++index) + delete Operands[index]; + + unsigned int numTokens = Tokens.size(); + + for (index = 0; index < numTokens; ++index) + delete Tokens[index]; + + delete Inst; +} + +uint64_t EDInst::byteSize() { + return ByteSize; +} + +int EDInst::stringify() { + if (StringifyResult.valid()) + return StringifyResult.result(); + + if (Disassembler.printInst(String, *Inst)) + return StringifyResult.setResult(-1); + + return StringifyResult.setResult(0); +} + +int EDInst::getString(const char*& str) { + if (stringify()) + return -1; + + str = String.c_str(); + + return 0; +} + +unsigned EDInst::instID() { + return Inst->getOpcode(); +} + +bool EDInst::isBranch() { + if (ThisInstInfo) + return + ThisInstInfo->instructionType == kInstructionTypeBranch || + ThisInstInfo->instructionType == kInstructionTypeCall; + else + return false; +} + +bool EDInst::isMove() { + if (ThisInstInfo) + return ThisInstInfo->instructionType == kInstructionTypeMove; + else + return false; +} + +int EDInst::parseOperands() { + if (ParseResult.valid()) + return ParseResult.result(); + + if (!ThisInstInfo) + return ParseResult.setResult(-1); + + unsigned int opIndex; + unsigned int mcOpIndex = 0; + + for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) { + if (isBranch() && + (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) { + BranchTarget = opIndex; + } + else if (isMove()) { + if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource) + MoveSource = opIndex; + else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget) + MoveTarget = opIndex; + } + + EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex); + + Operands.push_back(operand); + } + + return ParseResult.setResult(0); +} + +int EDInst::branchTargetID() { + if (parseOperands()) + return -1; + return BranchTarget; +} + +int EDInst::moveSourceID() { + if (parseOperands()) + return -1; + return MoveSource; +} + +int EDInst::moveTargetID() { + if (parseOperands()) + return -1; + return MoveTarget; +} + +int EDInst::numOperands() { + if (parseOperands()) + return -1; + return Operands.size(); +} + +int EDInst::getOperand(EDOperand *&operand, unsigned int index) { + if (parseOperands()) + return -1; + + if (index >= Operands.size()) + return -1; + + operand = Operands[index]; + return 0; +} + +int EDInst::tokenize() { + if (TokenizeResult.valid()) + return TokenizeResult.result(); + + if (stringify()) + return TokenizeResult.setResult(-1); + + return TokenizeResult.setResult(EDToken::tokenize(Tokens, + String, + OperandOrder, + Disassembler)); + +} + +int EDInst::numTokens() { + if (tokenize()) + return -1; + return Tokens.size(); +} + +int EDInst::getToken(EDToken *&token, unsigned int index) { + if (tokenize()) + return -1; + token = Tokens[index]; + return 0; +} + +#ifdef __BLOCKS__ +int EDInst::visitTokens(EDTokenVisitor_t visitor) { + if (tokenize()) + return -1; + + tokvec_t::iterator iter; + + for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) { + int ret = visitor(*iter); + if (ret == 1) + return 0; + if (ret != 0) + return -1; + } + + return 0; +} +#endif diff --git a/lib/MC/MCDisassembler/EDInst.h b/lib/MC/MCDisassembler/EDInst.h new file mode 100644 index 0000000000000..39d264fb7aadc --- /dev/null +++ b/lib/MC/MCDisassembler/EDInst.h @@ -0,0 +1,182 @@ +//===-- EDInst.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// instruction class. The instruction is responsible for vending the string +// representation, individual tokens and operands for a single instruction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDINST_H +#define LLVM_EDINST_H + +#include "llvm/System/DataTypes.h" +#include "llvm/ADT/SmallVector.h" +#include <string> +#include <vector> + +namespace llvm { + class MCInst; + struct EDInstInfo; + struct EDToken; + struct EDDisassembler; + struct EDOperand; + +#ifdef __BLOCKS__ + typedef int (^EDTokenVisitor_t)(EDToken *token); +#endif + +/// CachedResult - Encapsulates the result of a function along with the validity +/// of that result, so that slow functions don't need to run twice +struct CachedResult { + /// True if the result has been obtained by executing the function + bool Valid; + /// The result last obtained from the function + int Result; + + /// Constructor - Initializes an invalid result + CachedResult() : Valid(false) { } + /// valid - Returns true if the result has been obtained by executing the + /// function and false otherwise + bool valid() { return Valid; } + /// result - Returns the result of the function or an undefined value if + /// valid() is false + int result() { return Result; } + /// setResult - Sets the result of the function and declares it valid + /// returning the result (so that setResult() can be called from inside a + /// return statement) + /// @arg result - The result of the function + int setResult(int result) { Result = result; Valid = true; return result; } +}; + +/// EDInst - Encapsulates a single instruction, which can be queried for its +/// string representation, as well as its operands and tokens +struct EDInst { + /// The parent disassembler + EDDisassembler &Disassembler; + /// The containing MCInst + llvm::MCInst *Inst; + /// The instruction information provided by TableGen for this instruction + const llvm::EDInstInfo *ThisInstInfo; + /// The number of bytes for the machine code representation of the instruction + uint64_t ByteSize; + + /// The result of the stringify() function + CachedResult StringifyResult; + /// The string representation of the instruction + std::string String; + /// The order in which operands from the InstInfo's operand information appear + /// in String + const char* OperandOrder; + + /// The result of the parseOperands() function + CachedResult ParseResult; + typedef llvm::SmallVector<EDOperand*, 5> opvec_t; + /// The instruction's operands + opvec_t Operands; + /// The operand corresponding to the target, if the instruction is a branch + int BranchTarget; + /// The operand corresponding to the source, if the instruction is a move + int MoveSource; + /// The operand corresponding to the target, if the instruction is a move + int MoveTarget; + + /// The result of the tokenize() function + CachedResult TokenizeResult; + typedef std::vector<EDToken*> tokvec_t; + /// The instruction's tokens + tokvec_t Tokens; + + /// Constructor - initializes an instruction given the output of the LLVM + /// C++ disassembler + /// + /// @arg inst - The MCInst, which will now be owned by this object + /// @arg byteSize - The size of the consumed instruction, in bytes + /// @arg disassembler - The parent disassembler + /// @arg instInfo - The instruction information produced by the table + /// generator for this instruction + EDInst(llvm::MCInst *inst, + uint64_t byteSize, + EDDisassembler &disassembler, + const llvm::EDInstInfo *instInfo); + ~EDInst(); + + /// byteSize - returns the number of bytes consumed by the machine code + /// representation of the instruction + uint64_t byteSize(); + /// instID - returns the LLVM instruction ID of the instruction + unsigned instID(); + + /// stringify - populates the String and AsmString members of the instruction, + /// returning 0 on success or -1 otherwise + int stringify(); + /// getString - retrieves a pointer to the string representation of the + /// instructinon, returning 0 on success or -1 otherwise + /// + /// @arg str - A reference to a pointer that, on success, is set to point to + /// the string representation of the instruction; this string is still owned + /// by the instruction and will be deleted when it is + int getString(const char *&str); + + /// isBranch - Returns true if the instruction is a branch + bool isBranch(); + /// isMove - Returns true if the instruction is a move + bool isMove(); + + /// parseOperands - populates the Operands member of the instruction, + /// returning 0 on success or -1 otherwise + int parseOperands(); + /// branchTargetID - returns the ID (suitable for use with getOperand()) of + /// the target operand if the instruction is a branch, or -1 otherwise + int branchTargetID(); + /// moveSourceID - returns the ID of the source operand if the instruction + /// is a move, or -1 otherwise + int moveSourceID(); + /// moveTargetID - returns the ID of the target operand if the instruction + /// is a move, or -1 otherwise + int moveTargetID(); + + /// numOperands - returns the number of operands available to retrieve, or -1 + /// on error + int numOperands(); + /// getOperand - retrieves an operand from the instruction's operand list by + /// index, returning 0 on success or -1 on error + /// + /// @arg operand - A reference whose target is pointed at the operand on + /// success, although the operand is still owned by the EDInst + /// @arg index - The index of the operand in the instruction + int getOperand(EDOperand *&operand, unsigned int index); + + /// tokenize - populates the Tokens member of the instruction, returning 0 on + /// success or -1 otherwise + int tokenize(); + /// numTokens - returns the number of tokens in the instruction, or -1 on + /// error + int numTokens(); + /// getToken - retrieves a token from the instruction's token list by index, + /// returning 0 on success or -1 on error + /// + /// @arg token - A reference whose target is pointed at the token on success, + /// although the token is still owned by the EDInst + /// @arg index - The index of the token in the instrcutino + int getToken(EDToken *&token, unsigned int index); + +#ifdef __BLOCKS__ + /// visitTokens - Visits each token in turn and applies a block to it, + /// returning 0 if all blocks are visited and/or the block signals + /// termination by returning 1; returns -1 on error + /// + /// @arg visitor - The visitor block to apply to all tokens. + int visitTokens(EDTokenVisitor_t visitor); +#endif +}; + +} // end namespace llvm + +#endif diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp new file mode 100644 index 0000000000000..2aed123368dad --- /dev/null +++ b/lib/MC/MCDisassembler/EDOperand.cpp @@ -0,0 +1,282 @@ +//===-- EDOperand.cpp - LLVM Enhanced Disassembler ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's operand class. The +// operand is responsible for allowing evaluation given a particular register +// context. +// +//===----------------------------------------------------------------------===// + +#include "EDOperand.h" +#include "EDDisassembler.h" +#include "EDInst.h" +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCInst.h" +using namespace llvm; + +EDOperand::EDOperand(const EDDisassembler &disassembler, + const EDInst &inst, + unsigned int opIndex, + unsigned int &mcOpIndex) : + Disassembler(disassembler), + Inst(inst), + OpIndex(opIndex), + MCOpIndex(mcOpIndex) { + unsigned int numMCOperands = 0; + + if (Disassembler.Key.Arch == Triple::x86 || + Disassembler.Key.Arch == Triple::x86_64) { + uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex]; + + switch (operandType) { + default: + break; + case kOperandTypeImmediate: + numMCOperands = 1; + break; + case kOperandTypeRegister: + numMCOperands = 1; + break; + case kOperandTypeX86Memory: + numMCOperands = 5; + break; + case kOperandTypeX86EffectiveAddress: + numMCOperands = 4; + break; + case kOperandTypeX86PCRelative: + numMCOperands = 1; + break; + } + } + else if (Disassembler.Key.Arch == Triple::arm || + Disassembler.Key.Arch == Triple::thumb) { + uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex]; + + switch (operandType) { + default: + case kOperandTypeARMRegisterList: + break; + case kOperandTypeImmediate: + case kOperandTypeRegister: + case kOperandTypeARMBranchTarget: + case kOperandTypeARMSoImm: + case kOperandTypeThumb2SoImm: + case kOperandTypeARMSoImm2Part: + case kOperandTypeARMPredicate: + case kOperandTypeThumbITMask: + case kOperandTypeThumb2AddrModeImm8Offset: + case kOperandTypeARMTBAddrMode: + case kOperandTypeThumb2AddrModeImm8s4Offset: + numMCOperands = 1; + break; + case kOperandTypeThumb2SoReg: + case kOperandTypeARMAddrMode2Offset: + case kOperandTypeARMAddrMode3Offset: + case kOperandTypeARMAddrMode4: + case kOperandTypeARMAddrMode5: + case kOperandTypeARMAddrModePC: + case kOperandTypeThumb2AddrModeImm8: + case kOperandTypeThumb2AddrModeImm12: + case kOperandTypeThumb2AddrModeImm8s4: + case kOperandTypeThumbAddrModeRR: + case kOperandTypeThumbAddrModeSP: + numMCOperands = 2; + break; + case kOperandTypeARMSoReg: + case kOperandTypeARMAddrMode2: + case kOperandTypeARMAddrMode3: + case kOperandTypeThumb2AddrModeSoReg: + case kOperandTypeThumbAddrModeS1: + case kOperandTypeThumbAddrModeS2: + case kOperandTypeThumbAddrModeS4: + case kOperandTypeARMAddrMode6Offset: + numMCOperands = 3; + break; + case kOperandTypeARMAddrMode6: + numMCOperands = 4; + break; + } + } + + mcOpIndex += numMCOperands; +} + +EDOperand::~EDOperand() { +} + +int EDOperand::evaluate(uint64_t &result, + EDRegisterReaderCallback callback, + void *arg) { + uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex]; + + switch (Disassembler.Key.Arch) { + default: + return -1; + case Triple::x86: + case Triple::x86_64: + switch (operandType) { + default: + return -1; + case kOperandTypeImmediate: + result = Inst.Inst->getOperand(MCOpIndex).getImm(); + return 0; + case kOperandTypeRegister: + { + unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg(); + return callback(&result, reg, arg); + } + case kOperandTypeX86PCRelative: + { + int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm(); + + uint64_t ripVal; + + // TODO fix how we do this + + if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg)) + return -1; + + result = ripVal + displacement; + return 0; + } + case kOperandTypeX86Memory: + case kOperandTypeX86EffectiveAddress: + { + unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg(); + uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm(); + unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg(); + int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm(); + //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg(); + + uint64_t addr = 0; + + if (baseReg) { + uint64_t baseVal; + if (callback(&baseVal, baseReg, arg)) + return -1; + addr += baseVal; + } + + if (indexReg) { + uint64_t indexVal; + if (callback(&indexVal, indexReg, arg)) + return -1; + addr += (scaleAmount * indexVal); + } + + addr += displacement; + + result = addr; + return 0; + } + } + break; + case Triple::arm: + case Triple::thumb: + switch (operandType) { + default: + return -1; + case kOperandTypeImmediate: + result = Inst.Inst->getOperand(MCOpIndex).getImm(); + return 0; + case kOperandTypeRegister: + { + unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg(); + return callback(&result, reg, arg); + } + case kOperandTypeARMBranchTarget: + { + int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm(); + + uint64_t pcVal; + + if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg)) + return -1; + + result = pcVal + displacement; + return 0; + } + } + } + + return -1; +} + +int EDOperand::isRegister() { + return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister); +} + +unsigned EDOperand::regVal() { + return Inst.Inst->getOperand(MCOpIndex).getReg(); +} + +int EDOperand::isImmediate() { + return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate); +} + +uint64_t EDOperand::immediateVal() { + return Inst.Inst->getOperand(MCOpIndex).getImm(); +} + +int EDOperand::isMemory() { + uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex]; + + switch (operandType) { + default: + return 0; + case kOperandTypeX86Memory: + case kOperandTypeX86PCRelative: + case kOperandTypeX86EffectiveAddress: + case kOperandTypeARMSoReg: + case kOperandTypeARMSoImm: + case kOperandTypeARMAddrMode2: + case kOperandTypeARMAddrMode2Offset: + case kOperandTypeARMAddrMode3: + case kOperandTypeARMAddrMode3Offset: + case kOperandTypeARMAddrMode4: + case kOperandTypeARMAddrMode5: + case kOperandTypeARMAddrMode6: + case kOperandTypeARMAddrModePC: + case kOperandTypeARMBranchTarget: + case kOperandTypeThumbAddrModeS1: + case kOperandTypeThumbAddrModeS2: + case kOperandTypeThumbAddrModeS4: + case kOperandTypeThumbAddrModeRR: + case kOperandTypeThumbAddrModeSP: + case kOperandTypeThumb2SoImm: + case kOperandTypeThumb2AddrModeImm8: + case kOperandTypeThumb2AddrModeImm8Offset: + case kOperandTypeThumb2AddrModeImm12: + case kOperandTypeThumb2AddrModeSoReg: + case kOperandTypeThumb2AddrModeImm8s4: + return 1; + } +} + +#ifdef __BLOCKS__ +struct RegisterReaderWrapper { + EDOperand::EDRegisterBlock_t regBlock; +}; + +int readerWrapperCallback(uint64_t *value, + unsigned regID, + void *arg) { + struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg; + return wrapper->regBlock(value, regID); +} + +int EDOperand::evaluate(uint64_t &result, + EDRegisterBlock_t regBlock) { + struct RegisterReaderWrapper wrapper; + wrapper.regBlock = regBlock; + return evaluate(result, + readerWrapperCallback, + (void*)&wrapper); +} +#endif diff --git a/lib/MC/MCDisassembler/EDOperand.h b/lib/MC/MCDisassembler/EDOperand.h new file mode 100644 index 0000000000000..6e695224318c5 --- /dev/null +++ b/lib/MC/MCDisassembler/EDOperand.h @@ -0,0 +1,91 @@ +//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// operand class. The operand is responsible for allowing evaluation given a +// particular register context. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDOPERAND_H +#define LLVM_EDOPERAND_H + +#include "llvm/System/DataTypes.h" + +namespace llvm { + +struct EDDisassembler; +struct EDInst; + +typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, + void* arg); + + +/// EDOperand - Encapsulates a single operand, which can be evaluated by the +/// client +struct EDOperand { + /// The parent disassembler + const EDDisassembler &Disassembler; + /// The parent instruction + const EDInst &Inst; + + /// The index of the operand in the EDInst + unsigned int OpIndex; + /// The index of the first component of the operand in the MCInst + unsigned int MCOpIndex; + + /// Constructor - Initializes an EDOperand + /// + /// @arg disassembler - The disassembler responsible for the operand + /// @arg inst - The instruction containing this operand + /// @arg opIndex - The index of the operand in inst + /// @arg mcOpIndex - The index of the operand in the original MCInst + EDOperand(const EDDisassembler &disassembler, + const EDInst &inst, + unsigned int opIndex, + unsigned int &mcOpIndex); + ~EDOperand(); + + /// evaluate - Returns the numeric value of an operand to the extent possible, + /// returning 0 on success or -1 if there was some problem (such as a + /// register not being readable) + /// + /// @arg result - A reference whose target is filled in with the value of + /// the operand (the address if it is a memory operand) + /// @arg callback - A function to call to obtain register values + /// @arg arg - An opaque argument to pass to callback + int evaluate(uint64_t &result, + EDRegisterReaderCallback callback, + void *arg); + + /// isRegister - Returns 1 if the operand is a register or 0 otherwise + int isRegister(); + /// regVal - Returns the register value. + unsigned regVal(); + + /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise + int isImmediate(); + /// immediateVal - Returns the immediate value. + uint64_t immediateVal(); + + /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise + int isMemory(); + +#ifdef __BLOCKS__ + typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID); + + /// evaluate - Like evaluate for a callback, but uses a block instead + int evaluate(uint64_t &result, + EDRegisterBlock_t regBlock); +#endif +}; + +} // end namespace llvm + +#endif diff --git a/lib/MC/MCDisassembler/EDToken.cpp b/lib/MC/MCDisassembler/EDToken.cpp new file mode 100644 index 0000000000000..400e1649e9709 --- /dev/null +++ b/lib/MC/MCDisassembler/EDToken.cpp @@ -0,0 +1,206 @@ +//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembler library's token class. The +// token is responsible for vending information about the token, such as its +// type and logical value. +// +//===----------------------------------------------------------------------===// + +#include "EDToken.h" +#include "EDDisassembler.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +EDToken::EDToken(StringRef str, + enum tokenType type, + uint64_t localType, + EDDisassembler &disassembler) : + Disassembler(disassembler), + Str(str), + Type(type), + LocalType(localType), + OperandID(-1) { +} + +EDToken::~EDToken() { +} + +void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) { + Type = kTokenLiteral; + LiteralSign = sign; + LiteralAbsoluteValue = absoluteValue; +} + +void EDToken::makeRegister(unsigned registerID) { + Type = kTokenRegister; + RegisterID = registerID; +} + +void EDToken::setOperandID(int operandID) { + OperandID = operandID; +} + +enum EDToken::tokenType EDToken::type() const { + return Type; +} + +uint64_t EDToken::localType() const { + return LocalType; +} + +StringRef EDToken::string() const { + return Str; +} + +int EDToken::operandID() const { + return OperandID; +} + +int EDToken::literalSign() const { + if (Type != kTokenLiteral) + return -1; + return (LiteralSign ? 1 : 0); +} + +int EDToken::literalAbsoluteValue(uint64_t &value) const { + if (Type != kTokenLiteral) + return -1; + value = LiteralAbsoluteValue; + return 0; +} + +int EDToken::registerID(unsigned ®isterID) const { + if (Type != kTokenRegister) + return -1; + registerID = RegisterID; + return 0; +} + +int EDToken::tokenize(std::vector<EDToken*> &tokens, + std::string &str, + const char *operandOrder, + EDDisassembler &disassembler) { + SmallVector<MCParsedAsmOperand*, 5> parsedOperands; + SmallVector<AsmToken, 10> asmTokens; + + if (disassembler.parseInst(parsedOperands, asmTokens, str)) + return -1; + + SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator; + unsigned int operandIndex; + SmallVectorImpl<AsmToken>::iterator tokenIterator; + + operandIterator = parsedOperands.begin(); + operandIndex = 0; + + bool readOpcode = false; + + const char *wsPointer = asmTokens.begin()->getLoc().getPointer(); + + for (tokenIterator = asmTokens.begin(); + tokenIterator != asmTokens.end(); + ++tokenIterator) { + SMLoc tokenLoc = tokenIterator->getLoc(); + + const char *tokenPointer = tokenLoc.getPointer(); + + if (tokenPointer > wsPointer) { + unsigned long wsLength = tokenPointer - wsPointer; + + EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength), + EDToken::kTokenWhitespace, + 0, + disassembler); + + tokens.push_back(whitespaceToken); + } + + wsPointer = tokenPointer + tokenIterator->getString().size(); + + while (operandIterator != parsedOperands.end() && + tokenLoc.getPointer() > + (*operandIterator)->getEndLoc().getPointer()) { + ++operandIterator; + ++operandIndex; + } + + EDToken *token; + + switch (tokenIterator->getKind()) { + case AsmToken::Identifier: + if (!readOpcode) { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenOpcode, + (uint64_t)tokenIterator->getKind(), + disassembler); + readOpcode = true; + break; + } + // any identifier that isn't an opcode is mere punctuation; so we fall + // through + default: + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenPunctuation, + (uint64_t)tokenIterator->getKind(), + disassembler); + break; + case AsmToken::Integer: + { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenLiteral, + (uint64_t)tokenIterator->getKind(), + disassembler); + + int64_t intVal = tokenIterator->getIntVal(); + + if (intVal < 0) + token->makeLiteral(true, -intVal); + else + token->makeLiteral(false, intVal); + break; + } + case AsmToken::Register: + { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenLiteral, + (uint64_t)tokenIterator->getKind(), + disassembler); + + token->makeRegister((unsigned)tokenIterator->getRegVal()); + break; + } + } + + if (operandIterator != parsedOperands.end() && + tokenLoc.getPointer() >= + (*operandIterator)->getStartLoc().getPointer()) { + /// operandIndex == 0 means the operand is the instruction (which the + /// AsmParser treats as an operand but edis does not). We therefore skip + /// operandIndex == 0 and subtract 1 from all other operand indices. + + if (operandIndex > 0) + token->setOperandID(operandOrder[operandIndex - 1]); + } + + tokens.push_back(token); + } + + return 0; +} + +int EDToken::getString(const char*& buf) { + if (PermStr.length() == 0) { + PermStr = Str.str(); + } + buf = PermStr.c_str(); + return 0; +} diff --git a/lib/MC/MCDisassembler/EDToken.h b/lib/MC/MCDisassembler/EDToken.h new file mode 100644 index 0000000000000..6b2aeac60ba51 --- /dev/null +++ b/lib/MC/MCDisassembler/EDToken.h @@ -0,0 +1,139 @@ +//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's token +// class. The token is responsible for vending information about the token, +// such as its type and logical value. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EDTOKEN_H +#define LLVM_EDTOKEN_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/System/DataTypes.h" +#include <string> +#include <vector> + +namespace llvm { + +struct EDDisassembler; + +/// EDToken - Encapsulates a single token, which can provide a string +/// representation of itself or interpret itself in various ways, depending +/// on the token type. +struct EDToken { + enum tokenType { + kTokenWhitespace, + kTokenOpcode, + kTokenLiteral, + kTokenRegister, + kTokenPunctuation + }; + + /// The parent disassembler + EDDisassembler &Disassembler; + + /// The token's string representation + llvm::StringRef Str; + /// The token's string representation, but in a form suitable for export + std::string PermStr; + /// The type of the token, as exposed through the external API + enum tokenType Type; + /// The type of the token, as recorded by the syntax-specific tokenizer + uint64_t LocalType; + /// The operand corresponding to the token, or (unsigned int)-1 if not + /// part of an operand. + int OperandID; + + /// The sign if the token is a literal (1 if negative, 0 otherwise) + bool LiteralSign; + /// The absolute value if the token is a literal + uint64_t LiteralAbsoluteValue; + /// The LLVM register ID if the token is a register name + unsigned RegisterID; + + /// Constructor - Initializes an EDToken with the information common to all + /// tokens + /// + /// @arg str - The string corresponding to the token + /// @arg type - The token's type as exposed through the public API + /// @arg localType - The token's type as recorded by the tokenizer + /// @arg disassembler - The disassembler responsible for the token + EDToken(llvm::StringRef str, + enum tokenType type, + uint64_t localType, + EDDisassembler &disassembler); + + /// makeLiteral - Adds the information specific to a literal + /// @arg sign - The sign of the literal (1 if negative, 0 + /// otherwise) + /// + /// @arg absoluteValue - The absolute value of the literal + void makeLiteral(bool sign, uint64_t absoluteValue); + /// makeRegister - Adds the information specific to a register + /// + /// @arg registerID - The LLVM register ID + void makeRegister(unsigned registerID); + + /// setOperandID - Links the token to a numbered operand + /// + /// @arg operandID - The operand ID to link to + void setOperandID(int operandID); + + ~EDToken(); + + /// type - Returns the public type of the token + enum tokenType type() const; + /// localType - Returns the tokenizer-specific type of the token + uint64_t localType() const; + /// string - Returns the string representation of the token + llvm::StringRef string() const; + /// operandID - Returns the operand ID of the token + int operandID() const; + + /// literalSign - Returns the sign of the token + /// (1 if negative, 0 if positive or unsigned, -1 if it is not a literal) + int literalSign() const; + /// literalAbsoluteValue - Retrieves the absolute value of the token, and + /// returns -1 if the token is not a literal + /// @arg value - A reference to a value that is filled in with the absolute + /// value, if it is valid + int literalAbsoluteValue(uint64_t &value) const; + /// registerID - Retrieves the register ID of the token, and returns -1 if the + /// token is not a register + /// + /// @arg registerID - A reference to a value that is filled in with the + /// register ID, if it is valid + int registerID(unsigned ®isterID) const; + + /// tokenize - Tokenizes a string using the platform- and syntax-specific + /// tokenizer, and returns 0 on success (-1 on failure) + /// + /// @arg tokens - A vector that will be filled in with pointers to + /// allocated tokens + /// @arg str - The string, as outputted by the AsmPrinter + /// @arg operandOrder - The order of the operands from the operandFlags array + /// as they appear in str + /// @arg disassembler - The disassembler for the desired target and + // assembly syntax + static int tokenize(std::vector<EDToken*> &tokens, + std::string &str, + const char *operandOrder, + EDDisassembler &disassembler); + + /// getString - Directs a character pointer to the string, returning 0 on + /// success (-1 on failure) + /// @arg buf - A reference to a pointer that is set to point to the string. + /// The string is still owned by the token. + int getString(const char*& buf); +}; + +} // end namespace llvm +#endif diff --git a/lib/Target/MSIL/Makefile b/lib/MC/MCDisassembler/Makefile index 70eadb32e3606..7d71cd381a7c3 100644 --- a/lib/Target/MSIL/Makefile +++ b/lib/MC/MCDisassembler/Makefile @@ -1,16 +1,14 @@ -##===- lib/Target/MSIL/Makefile ----------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure +##===- lib/MC/MCDisassembler/Makefile ----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure # # This file is distributed under the University of Illinois Open Source # License. See LICENSE.TXT for details. -# +# ##===----------------------------------------------------------------------===## LEVEL = ../../.. -LIBRARYNAME = LLVMMSIL -DIRS = TargetInfo +LIBRARYNAME = LLVMMCDisassembler include $(LEVEL)/Makefile.common -CompileCommonOpts := $(CompileCommonOpts) -Wno-format diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp new file mode 100644 index 0000000000000..2da71f96c676a --- /dev/null +++ b/lib/MC/MCDwarf.cpp @@ -0,0 +1,21 @@ +//===- lib/MC/MCDwarf.cpp - MCDwarf implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDwarf.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +void MCDwarfFile::print(raw_ostream &OS) const { + OS << '"' << getName() << '"'; +} + +void MCDwarfFile::dump() const { + print(dbgs()); +} diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp new file mode 100644 index 0000000000000..570c3917ab46f --- /dev/null +++ b/lib/MC/MCELFStreamer.cpp @@ -0,0 +1,408 @@ +//===- lib/MC/MCELFStreamer.cpp - ELF Object Output ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits ELF .o object files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" + +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmBackend.h" + +using namespace llvm; + +namespace { + +class MCELFStreamer : public MCObjectStreamer { + void EmitInstToFragment(const MCInst &Inst); + void EmitInstToData(const MCInst &Inst); +public: + MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCObjectStreamer(Context, TAB, OS, Emitter) {} + + ~MCELFStreamer() {} + + /// @name MCStreamer Interface + /// @{ + + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolType(int Type) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EndCOFFSymbolDef() { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + SD.setSize(Value); + } + + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0); + + virtual void EmitFileDirective(StringRef Filename); + virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) { + DEBUG(dbgs() << "FIXME: MCELFStreamer:EmitDwarfFileDirective not implemented\n"); + } + + virtual void EmitInstruction(const MCInst &Inst); + virtual void Finish(); + + /// @} +}; + +} // end anonymous namespace. + +void MCELFStreamer::EmitLabel(MCSymbol *Symbol) { + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + + // FIXME: This is wasteful, we don't necessarily need to create a data + // fragment. Instead, we should mark the symbol as pointing into the data + // fragment if it exists, otherwise we should just queue the label and set its + // fragment pointer when we emit the next fragment. + MCDataFragment *F = getOrCreateDataFragment(); + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); + SD.setFragment(F); + SD.setOffset(F->getContents().size()); + + Symbol->setSection(*CurSection); +} + +void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + case MCAF_SubsectionsViaSymbols: + getAssembler().setSubsectionsViaSymbols(true); + return; + } + + assert(0 && "invalid assembler flag!"); +} + +void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + // FIXME: Lift context changes into super class. + getAssembler().getOrCreateSymbolData(*Symbol); + Symbol->setVariableValue(AddValueSymbols(Value)); +} + +void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { + // Indirect symbols are handled differently, to match how 'as' handles + // them. This makes writing matching .o files easier. + if (Attribute == MCSA_IndirectSymbol) { + // Note that we intentionally cannot use the symbol data here; this is + // important for matching the string table that 'as' generates. + IndirectSymbolData ISD; + ISD.Symbol = Symbol; + ISD.SectionData = getCurrentSectionData(); + getAssembler().getIndirectSymbols().push_back(ISD); + return; + } + + // Adding a symbol attribute always introduces the symbol, note that an + // important side effect of calling getOrCreateSymbolData here is to register + // the symbol with the assembler. + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + // The implementation of symbol attributes is designed to match 'as', but it + // leaves much to desired. It doesn't really make sense to arbitrarily add and + // remove flags, but 'as' allows this (in particular, see .desc). + // + // In the future it might be worth trying to make these operations more well + // defined. + switch (Attribute) { + case MCSA_LazyReference: + case MCSA_Reference: + case MCSA_NoDeadStrip: + case MCSA_PrivateExtern: + case MCSA_WeakDefinition: + case MCSA_WeakDefAutoPrivate: + case MCSA_Invalid: + case MCSA_ELF_TypeIndFunction: + case MCSA_IndirectSymbol: + assert(0 && "Invalid symbol attribute for ELF!"); + break; + + case MCSA_Global: + SD.setFlags(SD.getFlags() | ELF_STB_Global); + SD.setExternal(true); + break; + + case MCSA_WeakReference: + case MCSA_Weak: + SD.setFlags(SD.getFlags() | ELF_STB_Weak); + break; + + case MCSA_Local: + SD.setFlags(SD.getFlags() | ELF_STB_Local); + break; + + case MCSA_ELF_TypeFunction: + SD.setFlags(SD.getFlags() | ELF_STT_Func); + break; + + case MCSA_ELF_TypeObject: + SD.setFlags(SD.getFlags() | ELF_STT_Object); + break; + + case MCSA_ELF_TypeTLS: + SD.setFlags(SD.getFlags() | ELF_STT_Tls); + break; + + case MCSA_ELF_TypeCommon: + SD.setFlags(SD.getFlags() | ELF_STT_Common); + break; + + case MCSA_ELF_TypeNoType: + SD.setFlags(SD.getFlags() | ELF_STT_Notype); + break; + + case MCSA_Protected: + SD.setFlags(SD.getFlags() | ELF_STV_Protected); + break; + + case MCSA_Hidden: + SD.setFlags(SD.getFlags() | ELF_STV_Hidden); + break; + + case MCSA_Internal: + SD.setFlags(SD.getFlags() | ELF_STV_Internal); + break; + } +} + +void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + if ((SD.getFlags() & (0xf << ELF_STB_Shift)) == ELF_STB_Local) { + const MCSection *Section = getAssembler().getContext().getELFSection(".bss", + MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE | + MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); + + MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section); + MCFragment *F = new MCFillFragment(0, 0, Size, &SectData); + SD.setFragment(F); + Symbol->setSection(*Section); + SD.setSize(MCConstantExpr::Create(Size, getContext())); + } + + SD.setFlags(SD.getFlags() | ELF_STB_Global); + SD.setExternal(true); + + SD.setCommon(Size, ByteAlignment); +} + +void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); +} + +void MCELFStreamer::EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + MCDataFragment *DF = getOrCreateDataFragment(); + + // Avoid fixups when possible. + int64_t AbsValue; + if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) { + // FIXME: Endianness assumption. + for (unsigned i = 0; i != Size; ++i) + DF->getContents().push_back(uint8_t(AbsValue >> (i * 8))); + } else { + DF->addFixup(MCFixup::Create(DF->getContents().size(), AddValueSymbols(Value), + MCFixup::getKindForSize(Size))); + DF->getContents().resize(DF->getContents().size() + Size, 0); + } +} + +void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, unsigned ValueSize, + unsigned MaxBytesToEmit) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, + getCurrentSectionData()); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); +} + +void MCELFStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, + getCurrentSectionData()); + F->setEmitNops(true); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); +} + +void MCELFStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) { + // TODO: This is exactly the same as MCMachOStreamer. Consider merging into + // MCObjectStreamer. + new MCOrgFragment(*Offset, Value, getCurrentSectionData()); +} + +// Add a symbol for the file name of this module. This is the second +// entry in the module's symbol table (the first being the null symbol). +void MCELFStreamer::EmitFileDirective(StringRef Filename) { + MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename); + Symbol->setSection(*CurSection); + Symbol->setAbsolute(); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default); +} + +void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) { + MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData()); + + // Add the fixups and data. + // + // FIXME: Revisit this design decision when relaxation is done, we may be + // able to get away with not storing any extra data in the MCInst. + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + IF->getCode() = Code; + IF->getFixups() = Fixups; +} + +void MCELFStreamer::EmitInstToData(const MCInst &Inst) { + MCDataFragment *DF = getOrCreateDataFragment(); + + SmallVector<MCFixup, 4> Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + VecOS.flush(); + + // Add the fixups and data. + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size()); + DF->addFixup(Fixups[i]); + } + DF->getContents().append(Code.begin(), Code.end()); +} + +void MCELFStreamer::EmitInstruction(const MCInst &Inst) { + // Scan for values. + for (unsigned i = 0; i != Inst.getNumOperands(); ++i) + if (Inst.getOperand(i).isExpr()) + AddValueSymbols(Inst.getOperand(i).getExpr()); + + getCurrentSectionData()->setHasInstructions(true); + + // If this instruction doesn't need relaxation, just emit it as data. + if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) { + EmitInstToData(Inst); + return; + } + + // Otherwise, if we are relaxing everything, relax the instruction as much as + // possible and emit it as data. + if (getAssembler().getRelaxAll()) { + MCInst Relaxed; + getAssembler().getBackend().RelaxInstruction(Inst, Relaxed); + while (getAssembler().getBackend().MayNeedRelaxation(Relaxed)) + getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed); + EmitInstToData(Relaxed); + return; + } + + // Otherwise emit to a separate fragment. + EmitInstToFragment(Inst); +} + +void MCELFStreamer::Finish() { + getAssembler().Finish(); +} + +MCStreamer *llvm::createELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *CE, + bool RelaxAll) { + MCELFStreamer *S = new MCELFStreamer(Context, TAB, OS, CE); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; +} diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 44bc267c11c29..671874df2c69e 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -18,6 +18,8 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCMachOSymbolFlags.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetAsmBackend.h" @@ -28,58 +30,19 @@ namespace { class MCMachOStreamer : public MCObjectStreamer { private: - MCFragment *getCurrentFragment() const { - assert(getCurrentSectionData() && "No current section!"); - - if (!getCurrentSectionData()->empty()) - return &getCurrentSectionData()->getFragmentList().back(); - - return 0; - } - - /// Get a data fragment to write into, creating a new one if the current - /// fragment is not a data fragment. - MCDataFragment *getOrCreateDataFragment() const { - MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); - if (!F) - F = new MCDataFragment(getCurrentSectionData()); - return F; - } - void EmitInstToFragment(const MCInst &Inst); void EmitInstToData(const MCInst &Inst); + // FIXME: These will likely moved to a better place. + void MakeLineEntryForSection(const MCSection *Section); + const MCExpr * MakeStartMinusEndExpr(MCSymbol *Start, MCSymbol *End, + int IntVal); + void EmitDwarfFileTable(void); public: MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter) : MCObjectStreamer(Context, TAB, OS, Emitter) {} - const MCExpr *AddValueSymbols(const MCExpr *Value) { - switch (Value->getKind()) { - case MCExpr::Target: assert(0 && "Can't handle target exprs yet!"); - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbols(BE->getLHS()); - AddValueSymbols(BE->getRHS()); - break; - } - - case MCExpr::SymbolRef: - getAssembler().getOrCreateSymbolData( - cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr()); - break; - } - - return Value; - } - /// @name MCStreamer Interface /// @{ @@ -126,10 +89,16 @@ public: unsigned char Value = 0); virtual void EmitFileDirective(StringRef Filename) { - report_fatal_error("unsupported directive: '.file'"); + // FIXME: Just ignore the .file; it isn't important enough to fail the + // entire assembly. + + //report_fatal_error("unsupported directive: '.file'"); } virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) { - report_fatal_error("unsupported directive: '.file'"); + // FIXME: Just ignore the .file; it isn't important enough to fail the + // entire assembly. + + //report_fatal_error("unsupported directive: '.file'"); } virtual void EmitInstruction(const MCInst &Inst); @@ -142,6 +111,8 @@ public: } // end anonymous namespace. void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { + // TODO: This is almost exactly the same as WinCOFFStreamer. Consider merging + // into MCObjectStreamer. assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(CurSection && "Cannot emit before setting section!"); @@ -185,6 +156,8 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { } void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. // FIXME: Lift context changes into super class. getAssembler().getOrCreateSymbolData(*Symbol); Symbol->setVariableValue(AddValueSymbols(Value)); @@ -335,11 +308,15 @@ void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, } void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); } void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size, unsigned AddrSpace) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. MCDataFragment *DF = getOrCreateDataFragment(); // Avoid fixups when possible. @@ -359,6 +336,8 @@ void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size, void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, unsigned ValueSize, unsigned MaxBytesToEmit) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, @@ -371,6 +350,8 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit) { + // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into + // MCObjectStreamer. if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, @@ -429,6 +410,10 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { getCurrentSectionData()->setHasInstructions(true); + // Now that a machine instruction has been assembled into this section, make + // a line entry for any .loc directive that has been seen. + MakeLineEntryForSection(getCurrentSection()); + // If this instruction doesn't need relaxation, just emit it as data. if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) { EmitInstToData(Inst); @@ -450,7 +435,207 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { EmitInstToFragment(Inst); } +// +// This is called when an instruction is assembled into the specified section +// and if there is information from the last .loc directive that has yet to have +// a line entry made for it is made. +// +void MCMachOStreamer::MakeLineEntryForSection(const MCSection *Section) { + if (!getContext().getDwarfLocSeen()) + return; + + // Create a symbol at in the current section for use in the line entry. + MCSymbol *LineSym = getContext().CreateTempSymbol(); + // Set the value of the symbol to use for the MCLineEntry. + EmitLabel(LineSym); + + // Get the current .loc info saved in the context. + const MCDwarfLoc &DwarfLoc = getContext().getCurrentDwarfLoc(); + + // Create a (local) line entry with the symbol and the current .loc info. + MCLineEntry LineEntry(LineSym, DwarfLoc); + + // clear DwarfLocSeen saying the current .loc info is now used. + getContext().clearDwarfLocSeen(); + + // Get the MCLineSection for this section, if one does not exist for this + // section create it. + DenseMap<const MCSection *, MCLineSection *> &MCLineSections = + getContext().getMCLineSections(); + MCLineSection *LineSection = MCLineSections[Section]; + if (!LineSection) { + // Create a new MCLineSection. This will be deleted after the dwarf line + // table is created using it by iterating through the MCLineSections + // DenseMap. + LineSection = new MCLineSection; + // Save a pointer to the new LineSection into the MCLineSections DenseMap. + MCLineSections[Section] = LineSection; + } + + // Add the line entry to this section's entries. + LineSection->addLineEntry(LineEntry); +} + +// +// This helper routine returns an expression of End - Start + IntVal for use +// by EmitDwarfFileTable() below. +// +const MCExpr * MCMachOStreamer::MakeStartMinusEndExpr(MCSymbol *Start, + MCSymbol *End, + int IntVal) { + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + const MCExpr *Res = + MCSymbolRefExpr::Create(End, Variant, getContext()); + const MCExpr *RHS = + MCSymbolRefExpr::Create(Start, Variant, getContext()); + const MCExpr *Res1 = + MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS,getContext()); + const MCExpr *Res2 = + MCConstantExpr::Create(IntVal, getContext()); + const MCExpr *Res3 = + MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, getContext()); + return Res3; +} + +// +// This emits the Dwarf file (and eventually the line) table. +// +void MCMachOStreamer::EmitDwarfFileTable(void) { + // For now make sure we don't put out the Dwarf file table if no .file + // directives were seen. + const std::vector<MCDwarfFile *> &MCDwarfFiles = + getContext().getMCDwarfFiles(); + if (MCDwarfFiles.size() == 0) + return; + + // This is the Mach-O section, for ELF it is the .debug_line section. + SwitchSection(getContext().getMachOSection("__DWARF", "__debug_line", + MCSectionMachO::S_ATTR_DEBUG, + 0, SectionKind::getDataRelLocal())); + + // Create a symbol at the beginning of this section. + MCSymbol *LineStartSym = getContext().CreateTempSymbol(); + // Set the value of the symbol, as we are at the start of the section. + EmitLabel(LineStartSym); + + // Create a symbol for the end of the section (to be set when we get there). + MCSymbol *LineEndSym = getContext().CreateTempSymbol(); + + // The first 4 bytes is the total length of the information for this + // compilation unit (not including these 4 bytes for the length). + EmitValue(MakeStartMinusEndExpr(LineStartSym, LineEndSym, 4), 4, 0); + + // Next 2 bytes is the Version, which is Dwarf 2. + EmitIntValue(2, 2); + + // Create a symbol for the end of the prologue (to be set when we get there). + MCSymbol *ProEndSym = getContext().CreateTempSymbol(); // Lprologue_end + + // Length of the prologue, is the next 4 bytes. Which is the start of the + // section to the end of the prologue. Not including the 4 bytes for the + // total length, the 2 bytes for the version, and these 4 bytes for the + // length of the prologue. + EmitValue(MakeStartMinusEndExpr(LineStartSym, ProEndSym, (4 + 2 + 4)), 4, 0); + + // Parameters of the state machine, are next. + // Define the architecture-dependent minimum instruction length (in + // bytes). This value should be rather too small than too big. */ + // DWARF2_LINE_MIN_INSN_LENGTH + EmitIntValue(1, 1); + // Flag that indicates the initial value of the is_stmt_start flag. + // DWARF2_LINE_DEFAULT_IS_STMT + EmitIntValue(1, 1); + // Minimum line offset in a special line info. opcode. This value + // was chosen to give a reasonable range of values. */ + // DWARF2_LINE_BASE + EmitIntValue(uint64_t(-5), 1); + // Range of line offsets in a special line info. opcode. + // DWARF2_LINE_RANGE + EmitIntValue(14, 1); + // First special line opcode - leave room for the standard opcodes. + // DWARF2_LINE_OPCODE_BASE + EmitIntValue(13, 1); + + // Standard opcode lengths + EmitIntValue(0, 1); // length of DW_LNS_copy + EmitIntValue(1, 1); // length of DW_LNS_advance_pc + EmitIntValue(1, 1); // length of DW_LNS_advance_line + EmitIntValue(1, 1); // length of DW_LNS_set_file + EmitIntValue(1, 1); // length of DW_LNS_set_column + EmitIntValue(0, 1); // length of DW_LNS_negate_stmt + EmitIntValue(0, 1); // length of DW_LNS_set_basic_block + EmitIntValue(0, 1); // length of DW_LNS_const_add_pc + EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc + EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end + EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin + EmitIntValue(1, 1); // DW_LNS_set_isa + + // Put out the directory and file tables. + + // First the directory table. + const std::vector<StringRef> &MCDwarfDirs = + getContext().getMCDwarfDirs(); + for (unsigned i = 0; i < MCDwarfDirs.size(); i++) { + EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName + EmitBytes(StringRef("\0", 1), 0); // the null termination of the string + } + EmitIntValue(0, 1); // Terminate the directory list + + // Second the file table. + for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { + EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName + EmitBytes(StringRef("\0", 1), 0); // the null termination of the string + // FIXME the Directory number should be a .uleb128 not a .byte + EmitIntValue(MCDwarfFiles[i]->getDirIndex(), 1); + EmitIntValue(0, 1); // last modification timestamp (always 0) + EmitIntValue(0, 1); // filesize (always 0) + } + EmitIntValue(0, 1); // Terminate the file list + + // This is the end of the prologue, so set the value of the symbol at the + // end of the prologue (that was used in a previous expression). + EmitLabel(ProEndSym); + + // TODO: This is the point where the line tables would be emitted. + + // Delete the MCLineSections that were created in + // MCMachOStreamer::MakeLineEntryForSection() and used to emit the line + // tables. + DenseMap<const MCSection *, MCLineSection *> &MCLineSections = + getContext().getMCLineSections(); + for (DenseMap<const MCSection *, MCLineSection *>::iterator it = + MCLineSections.begin(), ie = MCLineSections.end(); it != ie; ++it) { + delete it->second; + } + + // If there are no line tables emited then we emit: + // The following DW_LNE_set_address sequence to set the address to zero + // TODO test for 32-bit or 64-bit output + // This is the sequence for 32-bit code + EmitIntValue(0, 1); + EmitIntValue(5, 1); + EmitIntValue(2, 1); + EmitIntValue(0, 1); + EmitIntValue(0, 1); + EmitIntValue(0, 1); + EmitIntValue(0, 1); + + // Lastly emit the DW_LNE_end_sequence which consists of 3 bytes '00 01 01' + // (00 is the code for extended opcodes, followed by a ULEB128 length of the + // extended opcode (01), and the DW_LNE_end_sequence (01). + EmitIntValue(0, 1); // DW_LNS_extended_op + EmitIntValue(1, 1); // ULEB128 length of the extended opcode + EmitIntValue(1, 1); // DW_LNE_end_sequence + + // This is the end of the section, so set the value of the symbol at the end + // of this section (that was used in a previous expression). + EmitLabel(LineEndSym); +} + void MCMachOStreamer::Finish() { + // Dump out the dwarf file and directory tables (soon to include line table) + EmitDwarfFileTable(); + // We have to set the fragment atom associations so we can relax properly for // Mach-O. diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 5332ade211535..f7a2f20ca4bc3 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -26,6 +26,7 @@ namespace { /// @{ virtual void SwitchSection(const MCSection *Section) { + PrevSection = CurSection; CurSection = Section; } diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index d3f7f7783ffa7..2b2385ef9156d 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -9,7 +9,11 @@ #include "llvm/MC/MCObjectStreamer.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Target/TargetAsmBackend.h" using namespace llvm; MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, @@ -21,15 +25,59 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, } MCObjectStreamer::~MCObjectStreamer() { + delete &Assembler->getBackend(); + delete &Assembler->getEmitter(); delete Assembler; } +MCFragment *MCObjectStreamer::getCurrentFragment() const { + assert(getCurrentSectionData() && "No current section!"); + + if (!getCurrentSectionData()->empty()) + return &getCurrentSectionData()->getFragmentList().back(); + + return 0; +} + +MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const { + MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); + if (!F) + F = new MCDataFragment(getCurrentSectionData()); + return F; +} + +const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) { + switch (Value->getKind()) { + case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!"); + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); + AddValueSymbols(BE->getLHS()); + AddValueSymbols(BE->getRHS()); + break; + } + + case MCExpr::SymbolRef: + Assembler->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr()); + break; + } + + return Value; +} + void MCObjectStreamer::SwitchSection(const MCSection *Section) { assert(Section && "Cannot switch to a null section!"); // If already in this section, then this is a noop. if (Section == CurSection) return; + PrevSection = CurSection; CurSection = Section; CurSectionData = &getAssembler().getOrCreateSectionData(*Section); } diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 465d98382877b..086df081a938f 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -117,6 +117,13 @@ AsmToken AsmLexer::LexLineComment() { return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); } +static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { + if (CurPtr[0] == 'L' && CurPtr[1] == 'L') + CurPtr += 2; + if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L') + CurPtr += 3; +} + /// LexDigit: First character is [0-9]. /// Local Label: [0-9][:] @@ -133,7 +140,7 @@ AsmToken AsmLexer::LexDigit() { ++CurPtr; StringRef Result(TokStart, CurPtr - TokStart); - + long long Value; if (Result.getAsInteger(10, Value)) { // We have to handle minint_as_a_positive_value specially, because @@ -143,6 +150,11 @@ AsmToken AsmLexer::LexDigit() { else return ReturnError(TokStart, "Invalid decimal number"); } + + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL + // suffixes on integer literals. + SkipIgnoredIntegerSuffix(CurPtr); + return AsmToken(AsmToken::Integer, Result, Value); } @@ -165,9 +177,13 @@ AsmToken AsmLexer::LexDigit() { StringRef Result(TokStart, CurPtr - TokStart); long long Value; - if (Result.getAsInteger(2, Value)) + if (Result.substr(2).getAsInteger(2, Value)) return ReturnError(TokStart, "Invalid binary number"); + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL + // suffixes on integer literals. + SkipIgnoredIntegerSuffix(CurPtr); + return AsmToken(AsmToken::Integer, Result, Value); } @@ -185,6 +201,10 @@ AsmToken AsmLexer::LexDigit() { if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) return ReturnError(TokStart, "Invalid hexadecimal number"); + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL + // suffixes on integer literals. + SkipIgnoredIntegerSuffix(CurPtr); + return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), (int64_t)Result); } @@ -198,6 +218,10 @@ AsmToken AsmLexer::LexDigit() { if (Result.getAsInteger(8, Value)) return ReturnError(TokStart, "Invalid octal number"); + // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL + // suffixes on integer literals. + SkipIgnoredIntegerSuffix(CurPtr); + return AsmToken(AsmToken::Integer, Result, Value); } diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index e0949bd2856f7..f83cd5eb2a160 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -11,47 +11,237 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCParser/AsmParser.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCParser/AsmCond.h" +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/SourceMgr.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetAsmParser.h" +#include <vector> using namespace llvm; namespace { +/// \brief Helper class for tracking macro definitions. +struct Macro { + StringRef Name; + StringRef Body; + +public: + Macro(StringRef N, StringRef B) : Name(N), Body(B) {} +}; + +/// \brief Helper class for storing information about an active macro +/// instantiation. +struct MacroInstantiation { + /// The macro being instantiated. + const Macro *TheMacro; + + /// The macro instantiation with substitutions. + MemoryBuffer *Instantiation; + + /// The location of the instantiation. + SMLoc InstantiationLoc; + + /// The location where parsing should resume upon instantiation completion. + SMLoc ExitLoc; + +public: + MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL, + const std::vector<std::vector<AsmToken> > &A); +}; + +/// \brief The concrete assembly parser instance. +class AsmParser : public MCAsmParser { + friend class GenericAsmParser; + + AsmParser(const AsmParser &); // DO NOT IMPLEMENT + void operator=(const AsmParser &); // DO NOT IMPLEMENT +private: + AsmLexer Lexer; + MCContext &Ctx; + MCStreamer &Out; + SourceMgr &SrcMgr; + MCAsmParserExtension *GenericParser; + MCAsmParserExtension *PlatformParser; + + /// This is the current buffer index we're lexing from as managed by the + /// SourceMgr object. + int CurBuffer; + + AsmCond TheCondState; + std::vector<AsmCond> TheCondStack; + + /// DirectiveMap - This is a table handlers for directives. Each handler is + /// invoked after the directive identifier is read and is responsible for + /// parsing and validating the rest of the directive. The handler is passed + /// in the directive name and the location of the directive keyword. + StringMap<std::pair<MCAsmParserExtension*, DirectiveHandler> > DirectiveMap; + + /// MacroMap - Map of currently defined macros. + StringMap<Macro*> MacroMap; + + /// ActiveMacros - Stack of active macro instantiations. + std::vector<MacroInstantiation*> ActiveMacros; + + /// Boolean tracking whether macro substitution is enabled. + unsigned MacrosEnabled : 1; + +public: + AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, + const MCAsmInfo &MAI); + ~AsmParser(); + + virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false); + + void AddDirectiveHandler(MCAsmParserExtension *Object, + StringRef Directive, + DirectiveHandler Handler) { + DirectiveMap[Directive] = std::make_pair(Object, Handler); + } + +public: + /// @name MCAsmParser Interface + /// { + + virtual SourceMgr &getSourceManager() { return SrcMgr; } + virtual MCAsmLexer &getLexer() { return Lexer; } + virtual MCContext &getContext() { return Ctx; } + virtual MCStreamer &getStreamer() { return Out; } + + virtual void Warning(SMLoc L, const Twine &Meg); + virtual bool Error(SMLoc L, const Twine &Msg); + + const AsmToken &Lex(); + + bool ParseExpression(const MCExpr *&Res); + virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc); + virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); + virtual bool ParseAbsoluteExpression(int64_t &Res); + + /// } + +private: + bool ParseStatement(); + + bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M); + void HandleMacroExit(); + + void PrintMacroInstantiations(); + void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const; + + /// EnterIncludeFile - Enter the specified file. This returns true on failure. + bool EnterIncludeFile(const std::string &Filename); + + /// \brief Reset the current lexer position to that given by \arg Loc. The + /// current token is not set; clients should ensure Lex() is called + /// subsequently. + void JumpToLoc(SMLoc Loc); + + void EatToEndOfStatement(); + + /// \brief Parse up to the end of statement and a return the contents from the + /// current token until the end of the statement; the current token on exit + /// will be either the EndOfStatement or EOF. + StringRef ParseStringToEndOfStatement(); + + bool ParseAssignment(StringRef Name); + + bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc); + bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc); + bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc); + + /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) + /// and set \arg Res to the identifier contents. + bool ParseIdentifier(StringRef &Res); + + // Directive Parsing. + bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz" + bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ... + bool ParseDirectiveFill(); // ".fill" + bool ParseDirectiveSpace(); // ".space" + bool ParseDirectiveSet(); // ".set" + bool ParseDirectiveOrg(); // ".org" + // ".align{,32}", ".p2align{,w,l}" + bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize); + + /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which + /// accepts a single symbol (which should be a label or an external). + bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr); + bool ParseDirectiveELFType(); // ELF specific ".type" + + bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" + + bool ParseDirectiveAbort(); // ".abort" + bool ParseDirectiveInclude(); // ".include" + + bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if" + bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" + bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else" + bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif + + /// ParseEscapedString - Parse the current token as a string which may include + /// escaped characters and return the string contents. + bool ParseEscapedString(std::string &Data); +}; + /// \brief Generic implementations of directive handling, etc. which is shared /// (or the default, at least) for all assembler parser. class GenericAsmParser : public MCAsmParserExtension { + template<bool (GenericAsmParser::*Handler)(StringRef, SMLoc)> + void AddDirectiveHandler(StringRef Directive) { + getParser().AddDirectiveHandler(this, Directive, + HandleDirective<GenericAsmParser, Handler>); + } + public: GenericAsmParser() {} + AsmParser &getParser() { + return (AsmParser&) this->MCAsmParserExtension::getParser(); + } + virtual void Initialize(MCAsmParser &Parser) { // Call the base implementation. this->MCAsmParserExtension::Initialize(Parser); // Debugging directives. - Parser.AddDirectiveHandler(this, ".file", MCAsmParser::DirectiveHandler( - &GenericAsmParser::ParseDirectiveFile)); - Parser.AddDirectiveHandler(this, ".line", MCAsmParser::DirectiveHandler( - &GenericAsmParser::ParseDirectiveLine)); - Parser.AddDirectiveHandler(this, ".loc", MCAsmParser::DirectiveHandler( - &GenericAsmParser::ParseDirectiveLoc)); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveFile>(".file"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLine>(".line"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc"); + + // Macro directives. + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>( + ".macros_on"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>( + ".macros_off"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro"); } - bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file" - bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line" - bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc" + bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); + + bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc); }; } @@ -69,7 +259,7 @@ AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, const MCAsmInfo &_MAI) : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), GenericParser(new GenericAsmParser), PlatformParser(0), - TargetParser(0), CurBuffer(0) { + CurBuffer(0), MacrosEnabled(true) { Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); // Initialize the generic parser. @@ -89,22 +279,33 @@ AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx, } AsmParser::~AsmParser() { + assert(ActiveMacros.empty() && "Unexpected active macro instantiation!"); + + // Destroy any macros. + for (StringMap<Macro*>::iterator it = MacroMap.begin(), + ie = MacroMap.end(); it != ie; ++it) + delete it->getValue(); + delete PlatformParser; delete GenericParser; } -void AsmParser::setTargetParser(TargetAsmParser &P) { - assert(!TargetParser && "Target parser is already initialized!"); - TargetParser = &P; - TargetParser->Initialize(*this); +void AsmParser::PrintMacroInstantiations() { + // Print the active macro instantiation stack. + for (std::vector<MacroInstantiation*>::const_reverse_iterator + it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it) + PrintMessage((*it)->InstantiationLoc, "while in macro instantiation", + "note"); } void AsmParser::Warning(SMLoc L, const Twine &Msg) { PrintMessage(L, Msg.str(), "warning"); + PrintMacroInstantiations(); } bool AsmParser::Error(SMLoc L, const Twine &Msg) { PrintMessage(L, Msg.str(), "error"); + PrintMacroInstantiations(); return true; } @@ -124,7 +325,12 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) { return false; } - + +void AsmParser::JumpToLoc(SMLoc Loc) { + CurBuffer = SrcMgr.FindBufferContainingLoc(Loc); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer()); +} + const AsmToken &AsmParser::Lex() { const AsmToken *tok = &Lexer.Lex(); @@ -133,15 +339,13 @@ const AsmToken &AsmParser::Lex() { // include stack. SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); if (ParentIncludeLoc != SMLoc()) { - CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); - Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), - ParentIncludeLoc.getPointer()); + JumpToLoc(ParentIncludeLoc); tok = &Lexer.Lex(); } } if (tok->is(AsmToken::Error)) - PrintMessage(Lexer.getErrLoc(), Lexer.getErr(), "error"); + Error(Lexer.getErrLoc(), Lexer.getErr()); return *tok; } @@ -174,6 +378,16 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { if (TheCondState.TheCond != StartingCondState.TheCond || TheCondState.Ignore != StartingCondState.Ignore) return TokError("unmatched .ifs or .elses"); + + // Check to see there are no empty DwarfFile slots. + const std::vector<MCDwarfFile *> &MCDwarfFiles = + getContext().getMCDwarfFiles(); + for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { + if (!MCDwarfFiles[i]){ + TokError("unassigned file number: " + Twine(i) + " for .file directives"); + HadError = true; + } + } // Finalize the output stream if there are no errors and if the client wants // us to. @@ -194,6 +408,16 @@ void AsmParser::EatToEndOfStatement() { Lex(); } +StringRef AsmParser::ParseStringToEndOfStatement() { + const char *Start = getTok().getLoc().getPointer(); + + while (Lexer.isNot(AsmToken::EndOfStatement) && + Lexer.isNot(AsmToken::Eof)) + Lex(); + + const char *End = getTok().getLoc().getPointer(); + return StringRef(Start, End - Start); +} /// ParseParenExpr - Parse a paren expression and return it. /// NOTE: This assumes the leading '(' has already been consumed. @@ -225,10 +449,17 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return true; Res = MCUnaryExpr::CreateLNot(Res, getContext()); return false; + case AsmToken::Dollar: case AsmToken::String: case AsmToken::Identifier: { + EndLoc = Lexer.getLoc(); + + StringRef Identifier; + if (ParseIdentifier(Identifier)) + return false; + // This is a symbol reference. - std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@'); + std::pair<StringRef, StringRef> Split = Identifier.split('@'); MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first); // Mark the symbol as used in an expression. @@ -236,12 +467,9 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { // Lookup the symbol variant if used. MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - if (Split.first.size() != getTok().getIdentifier().size()) + if (Split.first.size() != Identifier.size()) Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); - EndLoc = Lexer.getLoc(); - Lex(); // Eat identifier. - // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) { @@ -568,7 +796,12 @@ bool AsmParser::ParseStatement() { default: // Normal instruction or directive. break; } - + + // If macros are enabled, check to see if this is a macro instantiation. + if (MacrosEnabled) + if (const Macro *M = MacroMap.lookup(IDVal)) + return HandleMacroEntry(IDVal, IDLoc, M); + // Otherwise, we have a normal instruction or directive. if (IDVal[0] == '.') { // Assembler features @@ -591,11 +824,14 @@ bool AsmParser::ParseStatement() { if (IDVal == ".quad") return ParseDirectiveValue(8); - // FIXME: Target hooks for IsPow2. - if (IDVal == ".align") - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); - if (IDVal == ".align32") - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); + if (IDVal == ".align") { + bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); + return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1); + } + if (IDVal == ".align32") { + bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); + return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4); + } if (IDVal == ".balign") return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); if (IDVal == ".balignw") @@ -662,7 +898,7 @@ bool AsmParser::ParseStatement() { std::pair<MCAsmParserExtension*, DirectiveHandler> Handler = DirectiveMap.lookup(IDVal); if (Handler.first) - return (Handler.first->*Handler.second)(IDVal, IDLoc); + return (*Handler.second)(Handler.first, IDVal, IDLoc); // Target hook for parsing target specific directives. if (!getTargetParser().ParseDirective(ID)) @@ -684,20 +920,29 @@ bool AsmParser::ParseStatement() { if (!HadError && Lexer.isNot(AsmToken::EndOfStatement)) HadError = TokError("unexpected token in argument list"); + // Dump the parsed representation, if requested. + if (getShowParsedOperands()) { + SmallString<256> Str; + raw_svector_ostream OS(Str); + OS << "parsed instruction: ["; + for (unsigned i = 0; i != ParsedOperands.size(); ++i) { + if (i != 0) + OS << ", "; + ParsedOperands[i]->dump(OS); + } + OS << "]"; + + PrintMessage(IDLoc, OS.str(), "note"); + } + // If parsing succeeded, match the instruction. if (!HadError) { MCInst Inst; - if (!getTargetParser().MatchInstruction(ParsedOperands, Inst)) { + if (!getTargetParser().MatchInstruction(IDLoc, ParsedOperands, Inst)) { // Emit the instruction on success. Out.EmitInstruction(Inst); - } else { - // Otherwise emit a diagnostic about the match failure and set the error - // flag. - // - // FIXME: We should give nicer diagnostics about the exact failure. - Error(IDLoc, "unrecognized instruction"); + } else HadError = true; - } } // If there was no error, consume the end-of-statement token. Otherwise this @@ -712,6 +957,132 @@ bool AsmParser::ParseStatement() { return HadError; } +MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL, + const std::vector<std::vector<AsmToken> > &A) + : TheMacro(M), InstantiationLoc(IL), ExitLoc(EL) +{ + // Macro instantiation is lexical, unfortunately. We construct a new buffer + // to hold the macro body with substitutions. + SmallString<256> Buf; + raw_svector_ostream OS(Buf); + + StringRef Body = M->Body; + while (!Body.empty()) { + // Scan for the next substitution. + std::size_t End = Body.size(), Pos = 0; + for (; Pos != End; ++Pos) { + // Check for a substitution or escape. + if (Body[Pos] != '$' || Pos + 1 == End) + continue; + + char Next = Body[Pos + 1]; + if (Next == '$' || Next == 'n' || isdigit(Next)) + break; + } + + // Add the prefix. + OS << Body.slice(0, Pos); + + // Check if we reached the end. + if (Pos == End) + break; + + switch (Body[Pos+1]) { + // $$ => $ + case '$': + OS << '$'; + break; + + // $n => number of arguments + case 'n': + OS << A.size(); + break; + + // $[0-9] => argument + default: { + // Missing arguments are ignored. + unsigned Index = Body[Pos+1] - '0'; + if (Index >= A.size()) + break; + + // Otherwise substitute with the token values, with spaces eliminated. + for (std::vector<AsmToken>::const_iterator it = A[Index].begin(), + ie = A[Index].end(); it != ie; ++it) + OS << it->getString(); + break; + } + } + + // Update the scan point. + Body = Body.substr(Pos + 2); + } + + // We include the .endmacro in the buffer as our queue to exit the macro + // instantiation. + OS << ".endmacro\n"; + + Instantiation = MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>"); +} + +bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc, + const Macro *M) { + // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate + // this, although we should protect against infinite loops. + if (ActiveMacros.size() == 20) + return TokError("macros cannot be nested more than 20 levels deep"); + + // Parse the macro instantiation arguments. + std::vector<std::vector<AsmToken> > MacroArguments; + MacroArguments.push_back(std::vector<AsmToken>()); + unsigned ParenLevel = 0; + for (;;) { + if (Lexer.is(AsmToken::Eof)) + return TokError("unexpected token in macro instantiation"); + if (Lexer.is(AsmToken::EndOfStatement)) + break; + + // If we aren't inside parentheses and this is a comma, start a new token + // list. + if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) { + MacroArguments.push_back(std::vector<AsmToken>()); + } else { + // Adjust the current parentheses level. + if (Lexer.is(AsmToken::LParen)) + ++ParenLevel; + else if (Lexer.is(AsmToken::RParen) && ParenLevel) + --ParenLevel; + + // Append the token to the current argument list. + MacroArguments.back().push_back(getTok()); + } + Lex(); + } + + // Create the macro instantiation object and add to the current macro + // instantiation stack. + MacroInstantiation *MI = new MacroInstantiation(M, NameLoc, + getTok().getLoc(), + MacroArguments); + ActiveMacros.push_back(MI); + + // Jump to the macro instantiation and prime the lexer. + CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc()); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + Lex(); + + return false; +} + +void AsmParser::HandleMacroExit() { + // Jump to the EndOfStatement we should return to, and consume it. + JumpToLoc(ActiveMacros.back()->ExitLoc); + Lex(); + + // Pop the instantiation entry. + delete ActiveMacros.back(); + ActiveMacros.pop_back(); +} + bool AsmParser::ParseAssignment(StringRef Name) { // FIXME: Use better location, we should use proper tokens. SMLoc EqualLoc = Lexer.getLoc(); @@ -760,6 +1131,30 @@ bool AsmParser::ParseAssignment(StringRef Name) { /// ::= identifier /// ::= string bool AsmParser::ParseIdentifier(StringRef &Res) { + // The assembler has relaxed rules for accepting identifiers, in particular we + // allow things like '.globl $foo', which would normally be separate + // tokens. At this level, we have already lexed so we cannot (currently) + // handle this as a context dependent token, instead we detect adjacent tokens + // and return the combined identifier. + if (Lexer.is(AsmToken::Dollar)) { + SMLoc DollarLoc = getLexer().getLoc(); + + // Consume the dollar sign, and check for a following identifier. + Lex(); + if (Lexer.isNot(AsmToken::Identifier)) + return true; + + // We have a '$' followed by an identifier, make sure they are adjacent. + if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer()) + return true; + + // Construct the joined identifier and consume the token. + Res = StringRef(DollarLoc.getPointer(), + getTok().getIdentifier().size() + 1); + Lex(); + return false; + } + if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String)) return true; @@ -1081,13 +1476,14 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { bool UseCodeAlign = false; if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>( getStreamer().getCurrentSection())) - UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); + UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) && ValueSize == 1 && UseCodeAlign) { getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill); } else { // FIXME: Target specific behavior about how the "extra" bytes are filled. - getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); + getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize, + MaxBytesToFill); } return false; @@ -1238,31 +1634,22 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { } /// ParseDirectiveAbort -/// ::= .abort [ "abort_string" ] +/// ::= .abort [... message ...] bool AsmParser::ParseDirectiveAbort() { // FIXME: Use loc from directive. SMLoc Loc = getLexer().getLoc(); - StringRef Str = ""; - if (getLexer().isNot(AsmToken::EndOfStatement)) { - if (getLexer().isNot(AsmToken::String)) - return TokError("expected string in '.abort' directive"); - - Str = getTok().getString(); - - Lex(); - } - + StringRef Str = ParseStringToEndOfStatement(); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.abort' directive"); - + Lex(); - // FIXME: Handle here. if (Str.empty()) Error(Loc, ".abort detected. Assembly stopping."); else Error(Loc, ".abort '" + Str + "' detected. Assembly stopping."); + // FIXME: Actually abort assembly here. return false; } @@ -1286,9 +1673,7 @@ bool AsmParser::ParseDirectiveInclude() { // Attempt to switch the lexer to the included file before consuming the end // of statement to avoid losing it when we switch. if (EnterIncludeFile(Filename)) { - PrintMessage(IncludeLoc, - "Could not find include file '" + Filename + "'", - "error"); + Error(IncludeLoc, "Could not find include file '" + Filename + "'"); return true; } @@ -1401,6 +1786,7 @@ bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { // FIXME: I'm not sure what this is. int64_t FileNumber = -1; + SMLoc FileNumberLoc = getLexer().getLoc(); if (getLexer().is(AsmToken::Integer)) { FileNumber = getTok().getIntVal(); Lex(); @@ -1421,8 +1807,11 @@ bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { if (FileNumber == -1) getStreamer().EmitFileDirective(Filename); - else + else { + if (getContext().GetDwarfFile(Filename, FileNumber) == 0) + Error(FileNumberLoc, "file number already allocated"); getStreamer().EmitDwarfFileDirective(FileNumber, Filename); + } return false; } @@ -1449,40 +1838,193 @@ bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { /// ParseDirectiveLoc -/// ::= .loc number [number [number]] +/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end] +/// [epilogue_begin] [is_stmt VALUE] [isa VALUE] +/// The first number is a file number, must have been previously assigned with +/// a .file directive, the second number is the line number and optionally the +/// third number is a column position (zero if not specified). The remaining +/// optional items are .loc sub-directives. bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::Integer)) return TokError("unexpected token in '.loc' directive"); - - // FIXME: What are these fields? int64_t FileNumber = getTok().getIntVal(); - (void) FileNumber; - // FIXME: Validate file. - + if (FileNumber < 1) + return TokError("file number less than one in '.loc' directive"); + if (!getContext().ValidateDwarfFileNumber(FileNumber)) + return TokError("unassigned file number in '.loc' directive"); Lex(); - if (getLexer().isNot(AsmToken::EndOfStatement)) { - if (getLexer().isNot(AsmToken::Integer)) - return TokError("unexpected token in '.loc' directive"); - int64_t Param2 = getTok().getIntVal(); - (void) Param2; + int64_t LineNumber = 0; + if (getLexer().is(AsmToken::Integer)) { + LineNumber = getTok().getIntVal(); + if (LineNumber < 1) + return TokError("line number less than one in '.loc' directive"); Lex(); + } - if (getLexer().isNot(AsmToken::EndOfStatement)) { - if (getLexer().isNot(AsmToken::Integer)) + int64_t ColumnPos = 0; + if (getLexer().is(AsmToken::Integer)) { + ColumnPos = getTok().getIntVal(); + if (ColumnPos < 0) + return TokError("column position less than zero in '.loc' directive"); + Lex(); + } + + unsigned Flags = 0; + unsigned Isa = 0; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + StringRef Name; + SMLoc Loc = getTok().getLoc(); + if (getParser().ParseIdentifier(Name)) return TokError("unexpected token in '.loc' directive"); - int64_t Param3 = getTok().getIntVal(); - (void) Param3; - Lex(); + if (Name == "basic_block") + Flags |= DWARF2_FLAG_BASIC_BLOCK; + else if (Name == "prologue_end") + Flags |= DWARF2_FLAG_PROLOGUE_END; + else if (Name == "epilogue_begin") + Flags |= DWARF2_FLAG_EPILOGUE_BEGIN; + else if (Name == "is_stmt") { + SMLoc Loc = getTok().getLoc(); + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + // The expression must be the constant 0 or 1. + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { + int Value = MCE->getValue(); + if (Value == 0) + Flags &= ~DWARF2_FLAG_IS_STMT; + else if (Value == 1) + Flags |= DWARF2_FLAG_IS_STMT; + else + return Error(Loc, "is_stmt value not 0 or 1"); + } + else { + return Error(Loc, "is_stmt value not the constant value of 0 or 1"); + } + } + else if (Name == "isa") { + SMLoc Loc = getTok().getLoc(); + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + // The expression must be a constant greater or equal to 0. + if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { + int Value = MCE->getValue(); + if (Value < 0) + return Error(Loc, "isa number less than zero"); + Isa = Value; + } + else { + return Error(Loc, "isa number not a constant value"); + } + } + else { + return Error(Loc, "unknown sub-directive in '.loc' directive"); + } - // FIXME: Do something with the .loc. + if (getLexer().is(AsmToken::EndOfStatement)) + break; } } + getContext().setCurrentDwarfLoc(FileNumber, LineNumber, ColumnPos, Flags,Isa); + + return false; +} + +/// ParseDirectiveMacrosOnOff +/// ::= .macros_on +/// ::= .macros_off +bool GenericAsmParser::ParseDirectiveMacrosOnOff(StringRef Directive, + SMLoc DirectiveLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.file' directive"); + return Error(getLexer().getLoc(), + "unexpected token in '" + Directive + "' directive"); + + getParser().MacrosEnabled = Directive == ".macros_on"; return false; } +/// ParseDirectiveMacro +/// ::= .macro name +bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive, + SMLoc DirectiveLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.macro' directive"); + + // Eat the end of statement. + Lex(); + + AsmToken EndToken, StartToken = getTok(); + + // Lex the macro definition. + for (;;) { + // Check whether we have reached the end of the file. + if (getLexer().is(AsmToken::Eof)) + return Error(DirectiveLoc, "no matching '.endmacro' in definition"); + + // Otherwise, check whether we have reach the .endmacro. + if (getLexer().is(AsmToken::Identifier) && + (getTok().getIdentifier() == ".endm" || + getTok().getIdentifier() == ".endmacro")) { + EndToken = getTok(); + Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '" + EndToken.getIdentifier() + + "' directive"); + break; + } + + // Otherwise, scan til the end of the statement. + getParser().EatToEndOfStatement(); + } + + if (getParser().MacroMap.lookup(Name)) { + return Error(DirectiveLoc, "macro '" + Name + "' is already defined"); + } + + const char *BodyStart = StartToken.getLoc().getPointer(); + const char *BodyEnd = EndToken.getLoc().getPointer(); + StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); + getParser().MacroMap[Name] = new Macro(Name, Body); + return false; +} + +/// ParseDirectiveEndMacro +/// ::= .endm +/// ::= .endmacro +bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive, + SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '" + Directive + "' directive"); + + // If we are inside a macro instantiation, terminate the current + // instantiation. + if (!getParser().ActiveMacros.empty()) { + getParser().HandleMacroExit(); + return false; + } + + // Otherwise, this .endmacro is a stray entry in the file; well formed + // .endmacro directives are handled during the macro definition parsing. + return TokError("unexpected '" + Directive + "' in file, " + "no current macro definition"); +} + +/// \brief Create an MCAsmParser instance. +MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM, + MCContext &C, MCStreamer &Out, + const MCAsmInfo &MAI) { + return new AsmParser(T, SM, C, Out, MAI); +} diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 7d8639ea4d81f..54ddb449b2859 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -25,6 +25,12 @@ namespace { /// \brief Implementation of directive handling which is shared across all /// Darwin targets. class DarwinAsmParser : public MCAsmParserExtension { + template<bool (DarwinAsmParser::*Handler)(StringRef, SMLoc)> + void AddDirectiveHandler(StringRef Directive) { + getParser().AddDirectiveHandler(this, Directive, + HandleDirective<DarwinAsmParser, Handler>); + } + bool ParseSectionSwitch(const char *Segment, const char *Section, unsigned TAA = 0, unsigned ImplicitAlign = 0, unsigned StubSize = 0); @@ -36,168 +42,70 @@ public: // Call the base implementation. this->MCAsmParserExtension::Initialize(Parser); - Parser.AddDirectiveHandler(this, ".desc", MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseDirectiveDesc)); - Parser.AddDirectiveHandler(this, ".lsym", MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseDirectiveLsym)); - Parser.AddDirectiveHandler(this, ".subsections_via_symbols", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols)); - Parser.AddDirectiveHandler(this, ".dump", MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseDirectiveDumpOrLoad)); - Parser.AddDirectiveHandler(this, ".load", MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseDirectiveDumpOrLoad)); - Parser.AddDirectiveHandler(this, ".section", MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseDirectiveSection)); - Parser.AddDirectiveHandler(this, ".secure_log_unique", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseDirectiveSecureLogUnique)); - Parser.AddDirectiveHandler(this, ".secure_log_reset", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseDirectiveSecureLogReset)); - Parser.AddDirectiveHandler(this, ".tbss", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseDirectiveTBSS)); - Parser.AddDirectiveHandler(this, ".zerofill", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseDirectiveZerofill)); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>( + ".subsections_via_symbols"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>( + ".secure_log_unique"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>( + ".secure_log_reset"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill"); // Special section directives. - Parser.AddDirectiveHandler(this, ".const", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveConst)); - Parser.AddDirectiveHandler(this, ".const_data", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveConstData)); - Parser.AddDirectiveHandler(this, ".constructor", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveConstructor)); - Parser.AddDirectiveHandler(this, ".cstring", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveCString)); - Parser.AddDirectiveHandler(this, ".data", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveData)); - Parser.AddDirectiveHandler(this, ".destructor", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveDestructor)); - Parser.AddDirectiveHandler(this, ".dyld", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveDyld)); - Parser.AddDirectiveHandler(this, ".fvmlib_init0", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveFVMLibInit0)); - Parser.AddDirectiveHandler(this, ".fvmlib_init1", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveFVMLibInit1)); - Parser.AddDirectiveHandler(this, ".lazy_symbol_pointer", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers)); - Parser.AddDirectiveHandler(this, ".literal16", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveLiteral16)); - Parser.AddDirectiveHandler(this, ".literal4", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveLiteral4)); - Parser.AddDirectiveHandler(this, ".literal8", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveLiteral8)); - Parser.AddDirectiveHandler(this, ".mod_init_func", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveModInitFunc)); - Parser.AddDirectiveHandler(this, ".mod_term_func", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveModTermFunc)); - Parser.AddDirectiveHandler(this, ".non_lazy_symbol_pointer", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers)); - Parser.AddDirectiveHandler(this, ".objc_cat_cls_meth", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth)); - Parser.AddDirectiveHandler(this, ".objc_cat_inst_meth", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth)); - Parser.AddDirectiveHandler(this, ".objc_category", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCCategory)); - Parser.AddDirectiveHandler(this, ".objc_class", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCClass)); - Parser.AddDirectiveHandler(this, ".objc_class_names", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCClassNames)); - Parser.AddDirectiveHandler(this, ".objc_class_vars", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCClassVars)); - Parser.AddDirectiveHandler(this, ".objc_cls_meth", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCClsMeth)); - Parser.AddDirectiveHandler(this, ".objc_cls_refs", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCClsRefs)); - Parser.AddDirectiveHandler(this, ".objc_inst_meth", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCInstMeth)); - Parser.AddDirectiveHandler(this, ".objc_instance_vars", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars)); - Parser.AddDirectiveHandler(this, ".objc_message_refs", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs)); - Parser.AddDirectiveHandler(this, ".objc_meta_class", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCMetaClass)); - Parser.AddDirectiveHandler(this, ".objc_meth_var_names", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames)); - Parser.AddDirectiveHandler(this, ".objc_meth_var_types", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes)); - Parser.AddDirectiveHandler(this, ".objc_module_info", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo)); - Parser.AddDirectiveHandler(this, ".objc_protocol", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCProtocol)); - Parser.AddDirectiveHandler(this, ".objc_selector_strs", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs)); - Parser.AddDirectiveHandler(this, ".objc_string_object", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCStringObject)); - Parser.AddDirectiveHandler(this, ".objc_symbols", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveObjCSymbols)); - Parser.AddDirectiveHandler(this, ".picsymbol_stub", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectivePICSymbolStub)); - Parser.AddDirectiveHandler(this, ".static_const", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveStaticConst)); - Parser.AddDirectiveHandler(this, ".static_data", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveStaticData)); - Parser.AddDirectiveHandler(this, ".symbol_stub", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveSymbolStub)); - Parser.AddDirectiveHandler(this, ".tdata", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveTData)); - Parser.AddDirectiveHandler(this, ".text", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveText)); - Parser.AddDirectiveHandler(this, ".thread_init_func", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveThreadInitFunc)); - Parser.AddDirectiveHandler(this, ".tlv", - MCAsmParser::DirectiveHandler( - &DarwinAsmParser::ParseSectionDirectiveTLV)); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(".const_data"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>(".constructor"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>(".cstring"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>(".destructor"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>(".fvmlib_init0"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>(".fvmlib_init1"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>(".lazy_symbol_pointer"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>(".literal16"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>(".literal4"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>(".literal8"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>(".mod_init_func"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>(".mod_term_func"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>(".non_lazy_symbol_pointer"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>(".objc_cat_cls_meth"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>(".objc_cat_inst_meth"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>(".objc_category"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>(".objc_class"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>(".objc_class_names"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>(".objc_class_vars"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>(".objc_cls_meth"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>(".objc_cls_refs"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>(".objc_inst_meth"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>(".objc_instance_vars"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>(".objc_message_refs"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>(".objc_meta_class"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>(".objc_meth_var_names"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>(".objc_meth_var_types"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>(".objc_module_info"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>(".objc_protocol"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>(".objc_selector_strs"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCStringObject>(".objc_string_object"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>(".objc_symbols"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>(".picsymbol_stub"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>(".static_const"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>(".static_data"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>(".symbol_stub"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func"); + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv"); } bool ParseDirectiveDesc(StringRef, SMLoc); bool ParseDirectiveDumpOrLoad(StringRef, SMLoc); bool ParseDirectiveLsym(StringRef, SMLoc); - bool ParseDirectiveSection(); + bool ParseDirectiveSection(StringRef, SMLoc); bool ParseDirectiveSecureLogReset(StringRef, SMLoc); bool ParseDirectiveSecureLogUnique(StringRef, SMLoc); bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc); @@ -493,7 +401,7 @@ bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) { /// ParseDirectiveSection: /// ::= .section identifier (',' identifier)* -bool DarwinAsmParser::ParseDirectiveSection() { +bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) { SMLoc Loc = getLexer().getLoc(); StringRef SectionName; @@ -537,28 +445,22 @@ bool DarwinAsmParser::ParseDirectiveSection() { } /// ParseDirectiveSecureLogUnique -/// ::= .secure_log_unique "log message" +/// ::= .secure_log_unique ... message ... bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { - std::string LogMessage; - - if (getLexer().isNot(AsmToken::String)) - LogMessage = ""; - else{ - LogMessage = getTok().getString(); - Lex(); - } - + StringRef LogMessage = getParser().ParseStringToEndOfStatement(); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.secure_log_unique' directive"); if (getContext().getSecureLogUsed() != false) return Error(IDLoc, ".secure_log_unique specified multiple times"); - char *SecureLogFile = getContext().getSecureLogFile(); + // Get the secure log path. + const char *SecureLogFile = getContext().getSecureLogFile(); if (SecureLogFile == NULL) return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE " "environment variable unset."); + // Open the secure log file if we haven't already. raw_ostream *OS = getContext().getSecureLog(); if (OS == NULL) { std::string Err; @@ -571,6 +473,7 @@ bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { getContext().setSecureLog(OS); } + // Write the message. int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc); *OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier() << ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":" diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 7a54dd39aa479..f982fdaecb123 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -8,15 +8,24 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCParser/MCAsmParserExtension.h" -#include "llvm/MC/MCSectionELF.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/ADT/Twine.h" using namespace llvm; namespace { class ELFAsmParser : public MCAsmParserExtension { + template<bool (ELFAsmParser::*Handler)(StringRef, SMLoc)> + void AddDirectiveHandler(StringRef Directive) { + getParser().AddDirectiveHandler(this, Directive, + HandleDirective<ELFAsmParser, Handler>); + } + bool ParseSectionSwitch(StringRef Section, unsigned Type, unsigned Flags, SectionKind Kind); @@ -27,10 +36,21 @@ public: // Call the base implementation. this->MCAsmParserExtension::Initialize(Parser); - Parser.AddDirectiveHandler(this, ".data", MCAsmParser::DirectiveHandler( - &ELFAsmParser::ParseSectionDirectiveData)); - Parser.AddDirectiveHandler(this, ".text", MCAsmParser::DirectiveHandler( - &ELFAsmParser::ParseSectionDirectiveText)); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local"); + AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".sleb128"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".uleb128"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous"); } bool ParseSectionDirectiveData(StringRef, SMLoc) { @@ -43,6 +63,56 @@ public: MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC, SectionKind::getText()); } + bool ParseSectionDirectiveBSS(StringRef, SMLoc) { + return ParseSectionSwitch(".bss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE | + MCSectionELF::SHF_ALLOC, SectionKind::getBSS()); + } + bool ParseSectionDirectiveRoData(StringRef, SMLoc) { + return ParseSectionSwitch(".rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, + SectionKind::getReadOnly()); + } + bool ParseSectionDirectiveTData(StringRef, SMLoc) { + return ParseSectionSwitch(".tdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE, + SectionKind::getThreadData()); + } + bool ParseSectionDirectiveTBSS(StringRef, SMLoc) { + return ParseSectionSwitch(".tbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE, + SectionKind::getThreadBSS()); + } + bool ParseSectionDirectiveDataRel(StringRef, SMLoc) { + return ParseSectionSwitch(".data.rel", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + } + bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) { + return ParseSectionSwitch(".data.rel.ro", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRel()); + } + bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) { + return ParseSectionSwitch(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRelLocal()); + } + bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) { + return ParseSectionSwitch(".eh_frame", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + } + bool ParseDirectiveLEB128(StringRef, SMLoc); + bool ParseDirectiveSection(StringRef, SMLoc); + bool ParseDirectiveSize(StringRef, SMLoc); + bool ParseDirectivePrevious(StringRef, SMLoc); }; } @@ -59,6 +129,159 @@ bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type, return false; } +bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + const MCExpr *Expr; + if (getParser().ParseExpression(Expr)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + getStreamer().EmitELFSize(Sym, Expr); + return false; +} + +// FIXME: This is a work in progress. +bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { + StringRef SectionName; + // FIXME: This doesn't parse section names like ".note.GNU-stack" correctly. + if (getParser().ParseIdentifier(SectionName)) + return TokError("expected identifier in directive"); + + std::string FlagsStr; + StringRef TypeName; + int64_t Size = 0; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in directive"); + + FlagsStr = getTok().getStringContents(); + Lex(); + + AsmToken::TokenKind TypeStartToken; + if (getContext().getAsmInfo().getCommentString()[0] == '@') + TypeStartToken = AsmToken::Percent; + else + TypeStartToken = AsmToken::At; + + if (getLexer().is(AsmToken::Comma)) { + Lex(); + if (getLexer().is(TypeStartToken)) { + Lex(); + if (getParser().ParseIdentifier(TypeName)) + return TokError("expected identifier in directive"); + + if (getLexer().is(AsmToken::Comma)) { + Lex(); + + if (getParser().ParseAbsoluteExpression(Size)) + return true; + + if (Size <= 0) + return TokError("section size must be positive"); + } + } + } + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + unsigned Flags = 0; + for (unsigned i = 0; i < FlagsStr.size(); i++) { + switch (FlagsStr[i]) { + case 'a': + Flags |= MCSectionELF::SHF_ALLOC; + break; + case 'x': + Flags |= MCSectionELF::SHF_EXECINSTR; + break; + case 'w': + Flags |= MCSectionELF::SHF_WRITE; + break; + case 'M': + Flags |= MCSectionELF::SHF_MERGE; + break; + case 'S': + Flags |= MCSectionELF::SHF_STRINGS; + break; + case 'T': + Flags |= MCSectionELF::SHF_TLS; + break; + case 'c': + Flags |= MCSectionELF::XCORE_SHF_CP_SECTION; + break; + case 'd': + Flags |= MCSectionELF::XCORE_SHF_DP_SECTION; + break; + default: + return TokError("unknown flag"); + } + } + + unsigned Type = MCSectionELF::SHT_NULL; + if (!TypeName.empty()) { + if (TypeName == "init_array") + Type = MCSectionELF::SHT_INIT_ARRAY; + else if (TypeName == "fini_array") + Type = MCSectionELF::SHT_FINI_ARRAY; + else if (TypeName == "preinit_array") + Type = MCSectionELF::SHT_PREINIT_ARRAY; + else if (TypeName == "nobits") + Type = MCSectionELF::SHT_NOBITS; + else if (TypeName == "progbits") + Type = MCSectionELF::SHT_PROGBITS; + else + return TokError("unknown section type"); + } + + SectionKind Kind = (Flags & MCSectionELF::SHF_EXECINSTR) + ? SectionKind::getText() + : SectionKind::getDataRel(); + getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type, + Flags, Kind, false)); + return false; +} + +bool ELFAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) { + int64_t Value; + if (getParser().ParseAbsoluteExpression(Value)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + // FIXME: Add proper MC support. + if (getContext().getAsmInfo().hasLEB128()) { + if (DirName[1] == 's') + getStreamer().EmitRawText("\t.sleb128\t" + Twine(Value)); + else + getStreamer().EmitRawText("\t.uleb128\t" + Twine(Value)); + return false; + } + // FIXME: This shouldn't be an error! + return TokError("LEB128 not supported yet"); +} + +bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { + const MCSection *PreviousSection = getStreamer().getPreviousSection(); + if (PreviousSection != NULL) + getStreamer().SwitchSection(PreviousSection); + + return false; +} + namespace llvm { MCAsmParserExtension *createELFAsmParser() { diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp index bee30641c7fcd..70295efc613ca 100644 --- a/lib/MC/MCParser/MCAsmParser.cpp +++ b/lib/MC/MCParser/MCAsmParser.cpp @@ -12,19 +12,26 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetAsmParser.h" using namespace llvm; -MCAsmParser::MCAsmParser() { +MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) { } MCAsmParser::~MCAsmParser() { } +void MCAsmParser::setTargetParser(TargetAsmParser &P) { + assert(!TargetParser && "Target parser is already initialized!"); + TargetParser = &P; + TargetParser->Initialize(*this); +} + const AsmToken &MCAsmParser::getTok() { return getLexer().getTok(); } -bool MCAsmParser::TokError(const char *Msg) { +bool MCAsmParser::TokError(const Twine &Msg) { Error(getLexer().getLoc(), Msg); return true; } @@ -34,8 +41,4 @@ bool MCAsmParser::ParseExpression(const MCExpr *&Res) { return ParseExpression(Res, L); } -/// getStartLoc - Get the location of the first token of this operand. -SMLoc MCParsedAsmOperand::getStartLoc() const { return SMLoc(); } -SMLoc MCParsedAsmOperand::getEndLoc() const { return SMLoc(); } - diff --git a/lib/MC/MCParser/TargetAsmParser.cpp b/lib/MC/MCParser/TargetAsmParser.cpp index 05760c96cc658..8d43c21f4bc96 100644 --- a/lib/MC/MCParser/TargetAsmParser.cpp +++ b/lib/MC/MCParser/TargetAsmParser.cpp @@ -11,7 +11,7 @@ using namespace llvm; TargetAsmParser::TargetAsmParser(const Target &T) - : TheTarget(T) + : TheTarget(T), AvailableFeatures(0) { } diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 573f2a3530ee9..3e9d02ea5ae73 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -15,7 +15,8 @@ #include <cstdlib> using namespace llvm; -MCStreamer::MCStreamer(MCContext &_Context) : Context(_Context), CurSection(0) { +MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), CurSection(0), + PrevSection(0) { } MCStreamer::~MCStreamer() { diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 7ca09511bdebd..cffabfadb3165 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -769,7 +769,7 @@ public: IsPCRel = 1; FixedValue = (FixupAddress - Layout.getSymbolAddress(SD_B) + Target.getConstant()); - FixedValue += 1 << Log2Size; + FixedValue += 1ULL << Log2Size; } else { FixedValue = 0; } diff --git a/lib/MC/Makefile b/lib/MC/Makefile index a661fa6f40804..bf8b7c0e78318 100644 --- a/lib/MC/Makefile +++ b/lib/MC/Makefile @@ -10,7 +10,7 @@ LEVEL = ../.. LIBRARYNAME = LLVMMC BUILD_ARCHIVE := 1 -PARALLEL_DIRS := MCParser +PARALLEL_DIRS := MCParser MCDisassembler include $(LEVEL)/Makefile.common diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 6804766b28957..eeb2b9675f4b5 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -12,41 +12,552 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "WinCOFFObjectWriter" + #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCSectionCOFF.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" + +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" + +#include "llvm/System/TimeValue.h" + +#include "../Target/X86/X86FixupKinds.h" + +#include <cstdio> + using namespace llvm; namespace { +typedef llvm::SmallString<COFF::NameSize> name; + +enum AuxiliaryType { + ATFunctionDefinition, + ATbfAndefSymbol, + ATWeakExternal, + ATFile, + ATSectionDefinition +}; + +struct AuxSymbol { + AuxiliaryType AuxType; + COFF::Auxiliary Aux; +}; + +class COFFSymbol { +public: + COFF::symbol Data; + + typedef llvm::SmallVector<AuxSymbol, 1> AuxiliarySymbols; + + name Name; + size_t Index; + AuxiliarySymbols Aux; + COFFSymbol *Other; + + MCSymbolData const *MCData; + + COFFSymbol(llvm::StringRef name, size_t index); + size_t size() const; + void set_name_offset(uint32_t Offset); +}; + +// This class contains staging data for a COFF relocation entry. +struct COFFRelocation { + COFF::relocation Data; + COFFSymbol *Symb; + + COFFRelocation() : Symb(NULL) {} + static size_t size() { return COFF::RelocationSize; } +}; + +typedef std::vector<COFFRelocation> relocations; + +class COFFSection { +public: + COFF::section Header; + + std::string Name; + size_t Number; + MCSectionData const *MCData; + COFFSymbol *Symb; + relocations Relocations; + + COFFSection(llvm::StringRef name, size_t Index); + static size_t size(); +}; + +// This class holds the COFF string table. +class StringTable { + typedef llvm::StringMap<size_t> map; + map Map; + + void update_length(); +public: + std::vector<char> Data; + + StringTable(); + size_t size() const; + size_t insert(llvm::StringRef String); +}; + +class WinCOFFObjectWriter : public MCObjectWriter { +public: + + typedef std::vector<COFFSymbol*> symbols; + typedef std::vector<COFFSection*> sections; + + typedef StringMap<COFFSymbol *> name_symbol_map; + typedef StringMap<COFFSection *> name_section_map; + + typedef DenseMap<MCSymbolData const *, COFFSymbol *> symbol_map; + typedef DenseMap<MCSectionData const *, COFFSection *> section_map; + + // Root level file contents. + bool Is64Bit; + COFF::header Header; + sections Sections; + symbols Symbols; + StringTable Strings; + + // Maps used during object file creation. + section_map SectionMap; + symbol_map SymbolMap; + + WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit); + ~WinCOFFObjectWriter(); + + COFFSymbol *createSymbol(llvm::StringRef Name); + COFFSection *createSection(llvm::StringRef Name); + + void InitCOFFEntity(COFFSymbol &Symbol); + void InitCOFFEntity(COFFSection &Section); + + template <typename object_t, typename list_t> + object_t *createCOFFEntity(llvm::StringRef Name, list_t &List); + + void DefineSection(MCSectionData const &SectionData); + void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler); + + bool ExportSection(COFFSection *S); + bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm); + + // Entity writing methods. + + void WriteFileHeader(const COFF::header &Header); + void WriteSymbol(const COFFSymbol *S); + void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S); + void WriteSectionHeader(const COFF::section &S); + void WriteRelocation(const COFF::relocation &R); + + // MCObjectWriter interface implementation. + + void ExecutePostLayoutBinding(MCAssembler &Asm); + + void RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); + + void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout); +}; +} + +static inline void write_uint32_le(void *Data, uint32_t const &Value) { + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data); + Ptr[0] = (Value & 0x000000FF) >> 0; + Ptr[1] = (Value & 0x0000FF00) >> 8; + Ptr[2] = (Value & 0x00FF0000) >> 16; + Ptr[3] = (Value & 0xFF000000) >> 24; +} + +static inline void write_uint16_le(void *Data, uint16_t const &Value) { + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data); + Ptr[0] = (Value & 0x00FF) >> 0; + Ptr[1] = (Value & 0xFF00) >> 8; +} + +static inline void write_uint8_le(void *Data, uint8_t const &Value) { + uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data); + Ptr[0] = (Value & 0xFF) >> 0; +} - class WinCOFFObjectWriter : public MCObjectWriter { - public: - WinCOFFObjectWriter(raw_ostream &OS); +//------------------------------------------------------------------------------ +// Symbol class implementation + +COFFSymbol::COFFSymbol(llvm::StringRef name, size_t index) + : Name(name.begin(), name.end()), Index(-1) + , Other(NULL), MCData(NULL) { + memset(&Data, 0, sizeof(Data)); +} + +size_t COFFSymbol::size() const { + return COFF::SymbolSize + (Data.NumberOfAuxSymbols * COFF::SymbolSize); +} + +// In the case that the name does not fit within 8 bytes, the offset +// into the string table is stored in the last 4 bytes instead, leaving +// the first 4 bytes as 0. +void COFFSymbol::set_name_offset(uint32_t Offset) { + write_uint32_le(Data.Name + 0, 0); + write_uint32_le(Data.Name + 4, Offset); +} + +//------------------------------------------------------------------------------ +// Section class implementation + +COFFSection::COFFSection(llvm::StringRef name, size_t Index) + : Name(name), Number(Index + 1) + , MCData(NULL), Symb(NULL) { + memset(&Header, 0, sizeof(Header)); +} + +size_t COFFSection::size() { + return COFF::SectionSize; +} + +//------------------------------------------------------------------------------ +// StringTable class implementation + +/// Write the length of the string table into Data. +/// The length of the string table includes uint32 length header. +void StringTable::update_length() { + write_uint32_le(&Data.front(), Data.size()); +} + +StringTable::StringTable() { + // The string table data begins with the length of the entire string table + // including the length header. Allocate space for this header. + Data.resize(4); +} + +size_t StringTable::size() const { + return Data.size(); +} + +/// Add String to the table iff it is not already there. +/// @returns the index into the string table where the string is now located. +size_t StringTable::insert(llvm::StringRef String) { + map::iterator i = Map.find(String); + + if (i != Map.end()) + return i->second; + + size_t Offset = Data.size(); - // MCObjectWriter interface implementation. + // Insert string data into string table. + Data.insert(Data.end(), String.begin(), String.end()); + Data.push_back('\0'); - void ExecutePostLayoutBinding(MCAssembler &Asm); + // Put a reference to it in the map. + Map[String] = Offset; - void RecordRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, - uint64_t &FixedValue); + // Update the internal length field. + update_length(); - void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout); - }; + return Offset; } -WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS) - : MCObjectWriter(OS, true) { +//------------------------------------------------------------------------------ +// WinCOFFObjectWriter class implementation + +WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit) + : MCObjectWriter(OS, true) + , Is64Bit(is64Bit) { + memset(&Header, 0, sizeof(Header)); + + Is64Bit ? Header.Machine = COFF::IMAGE_FILE_MACHINE_AMD64 + : Header.Machine = COFF::IMAGE_FILE_MACHINE_I386; +} + +WinCOFFObjectWriter::~WinCOFFObjectWriter() { + for (symbols::iterator I = Symbols.begin(), E = Symbols.end(); I != E; ++I) + delete *I; + for (sections::iterator I = Sections.begin(), E = Sections.end(); I != E; ++I) + delete *I; +} + +COFFSymbol *WinCOFFObjectWriter::createSymbol(llvm::StringRef Name) { + return createCOFFEntity<COFFSymbol>(Name, Symbols); +} + +COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) { + return createCOFFEntity<COFFSection>(Name, Sections); +} + +/// This function initializes a symbol by entering its name into the string +/// table if it is too long to fit in the symbol table header. +void WinCOFFObjectWriter::InitCOFFEntity(COFFSymbol &S) { + if (S.Name.size() > COFF::NameSize) { + size_t StringTableEntry = Strings.insert(S.Name.c_str()); + + S.set_name_offset(StringTableEntry); + } else + memcpy(S.Data.Name, S.Name.c_str(), S.Name.size()); +} + +/// This function initializes a section by entering its name into the string +/// table if it is too long to fit in the section table header. +void WinCOFFObjectWriter::InitCOFFEntity(COFFSection &S) { + if (S.Name.size() > COFF::NameSize) { + size_t StringTableEntry = Strings.insert(S.Name.c_str()); + + // FIXME: Why is this number 999999? This number is never mentioned in the + // spec. I'm assuming this is due to the printed value needing to fit into + // the S.Header.Name field. In which case why not 9999999 (7 9's instead of + // 6)? The spec does not state if this entry should be null terminated in + // this case, and thus this seems to be the best way to do it. I think I + // just solved my own FIXME... + if (StringTableEntry > 999999) + report_fatal_error("COFF string table is greater than 999999 bytes."); + + sprintf(S.Header.Name, "/%d", (unsigned)StringTableEntry); + } else + memcpy(S.Header.Name, S.Name.c_str(), S.Name.size()); +} + +/// A template used to lookup or create a symbol/section, and initialize it if +/// needed. +template <typename object_t, typename list_t> +object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name, + list_t &List) { + object_t *Object = new object_t(Name, List.size()); + + InitCOFFEntity(*Object); + + List.push_back(Object); + + return Object; +} + +/// This function takes a section data object from the assembler +/// and creates the associated COFF section staging object. +void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) { + // FIXME: Not sure how to verify this (at least in a debug build). + MCSectionCOFF const &Sec = + static_cast<MCSectionCOFF const &>(SectionData.getSection()); + + COFFSection *coff_section = createSection(Sec.getSectionName()); + COFFSymbol *coff_symbol = createSymbol(Sec.getSectionName()); + + coff_section->Symb = coff_symbol; + coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + coff_symbol->Data.SectionNumber = coff_section->Number; + + // In this case the auxiliary symbol is a Section Definition. + coff_symbol->Aux.resize(1); + memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0])); + coff_symbol->Aux[0].AuxType = ATSectionDefinition; + coff_symbol->Aux[0].Aux.SectionDefinition.Number = coff_section->Number; + coff_symbol->Aux[0].Aux.SectionDefinition.Selection = Sec.getSelection(); + + coff_section->Header.Characteristics = Sec.getCharacteristics(); + + uint32_t &Characteristics = coff_section->Header.Characteristics; + switch (SectionData.getAlignment()) { + case 1: Characteristics |= COFF::IMAGE_SCN_ALIGN_1BYTES; break; + case 2: Characteristics |= COFF::IMAGE_SCN_ALIGN_2BYTES; break; + case 4: Characteristics |= COFF::IMAGE_SCN_ALIGN_4BYTES; break; + case 8: Characteristics |= COFF::IMAGE_SCN_ALIGN_8BYTES; break; + case 16: Characteristics |= COFF::IMAGE_SCN_ALIGN_16BYTES; break; + case 32: Characteristics |= COFF::IMAGE_SCN_ALIGN_32BYTES; break; + case 64: Characteristics |= COFF::IMAGE_SCN_ALIGN_64BYTES; break; + case 128: Characteristics |= COFF::IMAGE_SCN_ALIGN_128BYTES; break; + case 256: Characteristics |= COFF::IMAGE_SCN_ALIGN_256BYTES; break; + case 512: Characteristics |= COFF::IMAGE_SCN_ALIGN_512BYTES; break; + case 1024: Characteristics |= COFF::IMAGE_SCN_ALIGN_1024BYTES; break; + case 2048: Characteristics |= COFF::IMAGE_SCN_ALIGN_2048BYTES; break; + case 4096: Characteristics |= COFF::IMAGE_SCN_ALIGN_4096BYTES; break; + case 8192: Characteristics |= COFF::IMAGE_SCN_ALIGN_8192BYTES; break; + default: + llvm_unreachable("unsupported section alignment"); + } + + // Bind internal COFF section to MC section. + coff_section->MCData = &SectionData; + SectionMap[&SectionData] = coff_section; +} + +/// This function takes a section data object from the assembler +/// and creates the associated COFF symbol staging object. +void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, + MCAssembler &Assembler) { + COFFSymbol *coff_symbol = createSymbol(SymbolData.getSymbol().getName()); + + coff_symbol->Data.Type = (SymbolData.getFlags() & 0x0000FFFF) >> 0; + coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16; + + // If no storage class was specified in the streamer, define it here. + if (coff_symbol->Data.StorageClass == 0) { + bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL); + + coff_symbol->Data.StorageClass = + external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC; + } + + if (SymbolData.getFlags() & COFF::SF_WeakReference) { + coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL; + + const MCExpr *Value = SymbolData.getSymbol().getVariableValue(); + + // FIXME: This assert message isn't very good. + assert(Value->getKind() == MCExpr::SymbolRef && + "Value must be a SymbolRef!"); + + const MCSymbolRefExpr *SymbolRef = + static_cast<const MCSymbolRefExpr *>(Value); + + const MCSymbolData &OtherSymbolData = + Assembler.getSymbolData(SymbolRef->getSymbol()); + + // FIXME: This assert message isn't very good. + assert(SymbolMap.find(&OtherSymbolData) != SymbolMap.end() && + "OtherSymbolData must be in the symbol map!"); + + coff_symbol->Other = SymbolMap[&OtherSymbolData]; + + // Setup the Weak External auxiliary symbol. + coff_symbol->Aux.resize(1); + memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0])); + coff_symbol->Aux[0].AuxType = ATWeakExternal; + coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0; + coff_symbol->Aux[0].Aux.WeakExternal.Characteristics = + COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY; + } + + // Bind internal COFF symbol to MC symbol. + coff_symbol->MCData = &SymbolData; + SymbolMap[&SymbolData] = coff_symbol; +} + +bool WinCOFFObjectWriter::ExportSection(COFFSection *S) { + return (S->Header.Characteristics + & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0; +} + +bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData, + MCAssembler &Asm) { + // This doesn't seem to be right. Strings referred to from the .data section + // need symbols so they can be linked to code in the .text section right? + + // return Asm.isSymbolLinkerVisible (&SymbolData); + + // For now, all symbols are exported, the linker will sort it out for us. + return true; +} + +//------------------------------------------------------------------------------ +// entity writing methods + +void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) { + WriteLE16(Header.Machine); + WriteLE16(Header.NumberOfSections); + WriteLE32(Header.TimeDateStamp); + WriteLE32(Header.PointerToSymbolTable); + WriteLE32(Header.NumberOfSymbols); + WriteLE16(Header.SizeOfOptionalHeader); + WriteLE16(Header.Characteristics); +} + +void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol *S) { + WriteBytes(StringRef(S->Data.Name, COFF::NameSize)); + WriteLE32(S->Data.Value); + WriteLE16(S->Data.SectionNumber); + WriteLE16(S->Data.Type); + Write8(S->Data.StorageClass); + Write8(S->Data.NumberOfAuxSymbols); + WriteAuxiliarySymbols(S->Aux); +} + +void WinCOFFObjectWriter::WriteAuxiliarySymbols( + const COFFSymbol::AuxiliarySymbols &S) { + for(COFFSymbol::AuxiliarySymbols::const_iterator i = S.begin(), e = S.end(); + i != e; ++i) { + switch(i->AuxType) { + case ATFunctionDefinition: + WriteLE32(i->Aux.FunctionDefinition.TagIndex); + WriteLE32(i->Aux.FunctionDefinition.TotalSize); + WriteLE32(i->Aux.FunctionDefinition.PointerToLinenumber); + WriteLE32(i->Aux.FunctionDefinition.PointerToNextFunction); + WriteZeros(sizeof(i->Aux.FunctionDefinition.unused)); + break; + case ATbfAndefSymbol: + WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused1)); + WriteLE16(i->Aux.bfAndefSymbol.Linenumber); + WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused2)); + WriteLE32(i->Aux.bfAndefSymbol.PointerToNextFunction); + WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused3)); + break; + case ATWeakExternal: + WriteLE32(i->Aux.WeakExternal.TagIndex); + WriteLE32(i->Aux.WeakExternal.Characteristics); + WriteZeros(sizeof(i->Aux.WeakExternal.unused)); + break; + case ATFile: + WriteBytes(StringRef(reinterpret_cast<const char *>(i->Aux.File.FileName), + sizeof(i->Aux.File.FileName))); + break; + case ATSectionDefinition: + WriteLE32(i->Aux.SectionDefinition.Length); + WriteLE16(i->Aux.SectionDefinition.NumberOfRelocations); + WriteLE16(i->Aux.SectionDefinition.NumberOfLinenumbers); + WriteLE32(i->Aux.SectionDefinition.CheckSum); + WriteLE16(i->Aux.SectionDefinition.Number); + Write8(i->Aux.SectionDefinition.Selection); + WriteZeros(sizeof(i->Aux.SectionDefinition.unused)); + break; + } + } +} + +void WinCOFFObjectWriter::WriteSectionHeader(const COFF::section &S) { + WriteBytes(StringRef(S.Name, COFF::NameSize)); + + WriteLE32(S.VirtualSize); + WriteLE32(S.VirtualAddress); + WriteLE32(S.SizeOfRawData); + WriteLE32(S.PointerToRawData); + WriteLE32(S.PointerToRelocations); + WriteLE32(S.PointerToLineNumbers); + WriteLE16(S.NumberOfRelocations); + WriteLE16(S.NumberOfLineNumbers); + WriteLE32(S.Characteristics); +} + +void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) { + WriteLE32(R.VirtualAddress); + WriteLE32(R.SymbolTableIndex); + WriteLE16(R.Type); } //////////////////////////////////////////////////////////////////////////////// // MCObjectWriter interface implementations void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { + // "Define" each section & symbol. This creates section & symbol + // entries in the staging area and gives them their final indexes. + + for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++) + DefineSection(*i); + + for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(), + e = Asm.symbol_end(); i != e; i++) { + if (ExportSymbol(*i, Asm)) + DefineSymbol(*i, Asm); + } } void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, @@ -55,17 +566,209 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { + assert(Target.getSymA() != NULL && "Relocation must reference a symbol!"); + + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData &A_SD = Asm.getSymbolData(*A); + + MCSectionData const *SectionData = Fragment->getParent(); + + // Mark this symbol as requiring an entry in the symbol table. + assert(SectionMap.find(SectionData) != SectionMap.end() && + "Section must already have been defined in ExecutePostLayoutBinding!"); + assert(SymbolMap.find(&A_SD) != SymbolMap.end() && + "Symbol must already have been defined in ExecutePostLayoutBinding!"); + + COFFSection *coff_section = SectionMap[SectionData]; + COFFSymbol *coff_symbol = SymbolMap[&A_SD]; + + if (Target.getSymB()) { + const MCSymbol *B = &Target.getSymB()->getSymbol(); + MCSymbolData &B_SD = Asm.getSymbolData(*B); + + FixedValue = Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(&B_SD); + + // In the case where we have SymbA and SymB, we just need to store the delta + // between the two symbols. Update FixedValue to account for the delta, and + // skip recording the relocation. + return; + } else { + FixedValue = Target.getConstant(); + } + + COFFRelocation Reloc; + + Reloc.Data.SymbolTableIndex = 0; + Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment); + Reloc.Symb = coff_symbol; + + Reloc.Data.VirtualAddress += Fixup.getOffset(); + + switch (Fixup.getKind()) { + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 + : COFF::IMAGE_REL_I386_REL32; + // FIXME: Can anyone explain what this does other than adjust for the size + // of the offset? + FixedValue += 4; + break; + case FK_Data_4: + Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 + : COFF::IMAGE_REL_I386_DIR32; + break; + case FK_Data_8: + if (Is64Bit) + Reloc.Data.Type = COFF::IMAGE_REL_AMD64_ADDR64; + else + llvm_unreachable("unsupported relocation type"); + break; + default: + llvm_unreachable("unsupported relocation type"); + } + + coff_section->Relocations.push_back(Reloc); } void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout) { + // Assign symbol and section indexes and offsets. + + Header.NumberOfSymbols = 0; + + for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) { + COFFSymbol *coff_symbol = *i; + MCSymbolData const *SymbolData = coff_symbol->MCData; + + coff_symbol->Index = Header.NumberOfSymbols++; + + // Update section number & offset for symbols that have them. + if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) { + COFFSection *coff_section = SectionMap[SymbolData->Fragment->getParent()]; + + coff_symbol->Data.SectionNumber = coff_section->Number; + coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment) + + SymbolData->Offset; + } + + // Update auxiliary symbol info. + coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size(); + Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols; + } + + // Fixup weak external references. + for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) { + COFFSymbol *symb = *i; + + if (symb->Other != NULL) { + assert(symb->Aux.size() == 1 && + "Symbol must contain one aux symbol!"); + assert(symb->Aux[0].AuxType == ATWeakExternal && + "Symbol's aux symbol must be a Weak External!"); + symb->Aux[0].Aux.WeakExternal.TagIndex = symb->Other->Index; + } + } + + // Assign file offsets to COFF object file structures. + + unsigned offset = 0; + + offset += COFF::HeaderSize; + offset += COFF::SectionSize * Asm.size(); + + Header.NumberOfSections = Sections.size(); + + for (MCAssembler::const_iterator i = Asm.begin(), + e = Asm.end(); + i != e; i++) { + COFFSection *Sec = SectionMap[i]; + + Sec->Header.SizeOfRawData = Layout.getSectionFileSize(i); + + if (ExportSection(Sec)) { + Sec->Header.PointerToRawData = offset; + + offset += Sec->Header.SizeOfRawData; + } + + if (Sec->Relocations.size() > 0) { + Sec->Header.NumberOfRelocations = Sec->Relocations.size(); + Sec->Header.PointerToRelocations = offset; + + offset += COFF::RelocationSize * Sec->Relocations.size(); + + for (relocations::iterator cr = Sec->Relocations.begin(), + er = Sec->Relocations.end(); + cr != er; cr++) { + (*cr).Data.SymbolTableIndex = (*cr).Symb->Index; + } + } + + assert(Sec->Symb->Aux.size() == 1 && "Section's symbol must have one aux!"); + AuxSymbol &Aux = Sec->Symb->Aux[0]; + assert(Aux.AuxType == ATSectionDefinition && + "Section's symbol's aux symbol must be a Section Definition!"); + Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData; + Aux.Aux.SectionDefinition.NumberOfRelocations = + Sec->Header.NumberOfRelocations; + Aux.Aux.SectionDefinition.NumberOfLinenumbers = + Sec->Header.NumberOfLineNumbers; + } + + Header.PointerToSymbolTable = offset; + + Header.TimeDateStamp = sys::TimeValue::now().toEpochTime(); + + // Write it all to disk... + WriteFileHeader(Header); + + { + sections::iterator i, ie; + MCAssembler::const_iterator j, je; + + for (i = Sections.begin(), ie = Sections.end(); i != ie; i++) + WriteSectionHeader((*i)->Header); + + for (i = Sections.begin(), ie = Sections.end(), + j = Asm.begin(), je = Asm.end(); + (i != ie) && (j != je); i++, j++) { + if ((*i)->Header.PointerToRawData != 0) { + assert(OS.tell() == (*i)->Header.PointerToRawData && + "Section::PointerToRawData is insane!"); + + Asm.WriteSectionData(j, Layout, this); + } + + if ((*i)->Relocations.size() > 0) { + assert(OS.tell() == (*i)->Header.PointerToRelocations && + "Section::PointerToRelocations is insane!"); + + for (relocations::const_iterator k = (*i)->Relocations.begin(), + ke = (*i)->Relocations.end(); + k != ke; k++) { + WriteRelocation(k->Data); + } + } else + assert((*i)->Header.PointerToRelocations == 0 && + "Section::PointerToRelocations is insane!"); + } + } + + assert(OS.tell() == Header.PointerToSymbolTable && + "Header::PointerToSymbolTable is insane!"); + + for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) + WriteSymbol(*i); + + OS.write((char const *)&Strings.Data.front(), Strings.Data.size()); } //------------------------------------------------------------------------------ // WinCOFFObjectWriter factory function namespace llvm { - MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS) { - return new WinCOFFObjectWriter(OS); + MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit) { + return new WinCOFFObjectWriter(OS, is64Bit); } } diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index 1030cdb28d2cd..8a194bff21513 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -18,27 +18,34 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmBackend.h" +#include "llvm/ADT/StringMap.h" + #include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define dbg_notimpl(x) \ - do { dbgs() << "not implemented, " << __FUNCTION__ << " (" << x << ")"; \ - abort(); } while (false); - namespace { class WinCOFFStreamer : public MCObjectStreamer { public: + MCSymbol const *CurSymbol; + WinCOFFStreamer(MCContext &Context, TargetAsmBackend &TAB, MCCodeEmitter &CE, raw_ostream &OS); + void AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, bool External); + // MCStreamer interface virtual void EmitLabel(MCSymbol *Symbol); @@ -52,18 +59,18 @@ public: virtual void EndCOFFSymbolDef(); virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment); + unsigned ByteAlignment); virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size,unsigned ByteAlignment); + unsigned Size,unsigned ByteAlignment); virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment); + uint64_t Size, unsigned ByteAlignment); virtual void EmitBytes(StringRef Data, unsigned AddrSpace); - virtual void EmitValue(const MCExpr *Value, unsigned Size, + virtual void EmitValue(const MCExpr *Value, unsigned Size, unsigned AddrSpace); virtual void EmitGPRel32Value(const MCExpr *Value); virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, - unsigned ValueSize, unsigned MaxBytesToEmit); + unsigned ValueSize, unsigned MaxBytesToEmit); virtual void EmitCodeAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit); virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value); @@ -78,96 +85,224 @@ WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, TargetAsmBackend &TAB, MCCodeEmitter &CE, raw_ostream &OS) - : MCObjectStreamer(Context, TAB, OS, &CE) { + : MCObjectStreamer(Context, TAB, OS, &CE) + , CurSymbol(NULL) { +} + +void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment, bool External) { + assert(!Symbol->isInSection() && "Symbol must not already have a section!"); + + std::string SectionName(".bss$linkonce"); + SectionName.append(Symbol->getName().begin(), Symbol->getName().end()); + + MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol); + + unsigned Characteristics = + COFF::IMAGE_SCN_LNK_COMDAT | + COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + + int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST; + + const MCSection *Section = MCStreamer::getContext().getCOFFSection( + SectionName, Characteristics, Selection, SectionKind::getBSS()); + + MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section); + + if (SectionData.getAlignment() < ByteAlignment) + SectionData.setAlignment(ByteAlignment); + + SymbolData.setExternal(External); + + Symbol->setSection(*Section); + + if (ByteAlignment != 1) + new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData); + + SymbolData.setFragment(new MCFillFragment(0, 0, Size, &SectionData)); } // MCStreamer interface void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) { + // TODO: This is copied almost exactly from the MachOStreamer. Consider + // merging into MCObjectStreamer? + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + assert(CurSection && "Cannot emit before setting section!"); + + Symbol->setSection(*CurSection); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + + // FIXME: This is wasteful, we don't necessarily need to create a data + // fragment. Instead, we should mark the symbol as pointing into the data + // fragment if it exists, otherwise we should just queue the label and set its + // fragment pointer when we emit the next fragment. + MCDataFragment *DF = getOrCreateDataFragment(); + + assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); + SD.setFragment(DF); + SD.setOffset(DF->getContents().size()); } void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { - dbg_notimpl("Flag = " << Flag); + llvm_unreachable("not implemented"); } void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + // TODO: This is exactly the same as MachOStreamer. Consider merging into + // MCObjectStreamer. + getAssembler().getOrCreateSymbolData(*Symbol); + AddValueSymbols(Value); + Symbol->setVariableValue(Value); } void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { + switch (Attribute) { + case MCSA_WeakReference: + getAssembler().getOrCreateSymbolData(*Symbol).modifyFlags( + COFF::SF_WeakReference, + COFF::SF_WeakReference); + break; + + case MCSA_Global: + getAssembler().getOrCreateSymbolData(*Symbol).setExternal(true); + break; + + default: + llvm_unreachable("unsupported attribute"); + break; + } } void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { - dbg_notimpl("Symbol = " << Symbol->getName() << ", DescValue = "<< DescValue); + llvm_unreachable("not implemented"); } void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) { + assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls " + "to BeginCOFFSymbolDef!"); + CurSymbol = Symbol; } void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { + assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!"); + assert((StorageClass & ~0xFF) == 0 && "StorageClass must only have data in " + "the first byte!"); + + getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags( + StorageClass << COFF::SF_ClassShift, + COFF::SF_ClassMask); } void WinCOFFStreamer::EmitCOFFSymbolType(int Type) { + assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!"); + assert((Type & ~0xFFFF) == 0 && "Type must only have data in the first 2 " + "bytes"); + + getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags( + Type << COFF::SF_TypeShift, + COFF::SF_TypeMask); } void WinCOFFStreamer::EndCOFFSymbolDef() { + assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!"); + CurSymbol = NULL; } void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { - dbg_notimpl("Symbol = " << Symbol->getName() << ", Value = " << *Value); + llvm_unreachable("not implemented"); } void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) { + unsigned ByteAlignment) { + AddCommonSymbol(Symbol, Size, ByteAlignment, true); } void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { + AddCommonSymbol(Symbol, Size, 1, false); } void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size,unsigned ByteAlignment) { - MCSectionCOFF const *SectionCOFF = - static_cast<MCSectionCOFF const *>(Section); - - dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " << - Symbol->getName() << ", Size = " << Size << ", ByteAlignment = " - << ByteAlignment); + unsigned Size,unsigned ByteAlignment) { + llvm_unreachable("not implemented"); } void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { - MCSectionCOFF const *SectionCOFF = - static_cast<MCSectionCOFF const *>(Section); - - dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " << - Symbol->getName() << ", Size = " << Size << ", ByteAlignment = " - << ByteAlignment); + llvm_unreachable("not implemented"); } void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + // TODO: This is copied exactly from the MachOStreamer. Consider merging into + // MCObjectStreamer? + getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); } void WinCOFFStreamer::EmitValue(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { + unsigned AddrSpace) { + assert(AddrSpace == 0 && "Address space must be 0!"); + + // TODO: This is copied exactly from the MachOStreamer. Consider merging into + // MCObjectStreamer? + MCDataFragment *DF = getOrCreateDataFragment(); + + // Avoid fixups when possible. + int64_t AbsValue; + if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) { + // FIXME: Endianness assumption. + for (unsigned i = 0; i != Size; ++i) + DF->getContents().push_back(uint8_t(AbsValue >> (i * 8))); + } else { + DF->addFixup(MCFixup::Create(DF->getContents().size(), + AddValueSymbols(Value), + MCFixup::getKindForSize(Size))); + DF->getContents().resize(DF->getContents().size() + Size, 0); + } } void WinCOFFStreamer::EmitGPRel32Value(const MCExpr *Value) { - dbg_notimpl("Value = '" << *Value); + llvm_unreachable("not implemented"); } void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment, - int64_t Value, - unsigned ValueSize, - unsigned MaxBytesToEmit) { + int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) { + // TODO: This is copied exactly from the MachOStreamer. Consider merging into + // MCObjectStreamer? + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, + getCurrentSectionData()); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); } void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment, - unsigned MaxBytesToEmit = 0) { + unsigned MaxBytesToEmit) { + // TODO: This is copied exactly from the MachOStreamer. Consider merging into + // MCObjectStreamer? + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, + getCurrentSectionData()); + F->setEmitNops(true); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); } void WinCOFFStreamer::EmitValueToOffset(const MCExpr *Offset, - unsigned char Value = 0) { - dbg_notimpl("Offset = '" << *Offset << "', Value = " << Value); + unsigned char Value) { + llvm_unreachable("not implemented"); } void WinCOFFStreamer::EmitFileDirective(StringRef Filename) { @@ -176,11 +311,24 @@ void WinCOFFStreamer::EmitFileDirective(StringRef Filename) { } void WinCOFFStreamer::EmitDwarfFileDirective(unsigned FileNo, - StringRef Filename) { - dbg_notimpl("FileNo = " << FileNo << ", Filename = '" << Filename << "'"); + StringRef Filename) { + llvm_unreachable("not implemented"); } void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) { + for (unsigned i = 0, e = Instruction.getNumOperands(); i != e; ++i) + if (Instruction.getOperand(i).isExpr()) + AddValueSymbols(Instruction.getOperand(i).getExpr()); + + getCurrentSectionData()->setHasInstructions(true); + + MCInstFragment *Fragment = + new MCInstFragment(Instruction, getCurrentSectionData()); + + raw_svector_ostream VecOS(Fragment->getCode()); + + getAssembler().getEmitter().EncodeInstruction(Instruction, VecOS, + Fragment->getFixups()); } void WinCOFFStreamer::Finish() { @@ -192,7 +340,10 @@ namespace llvm MCStreamer *createWinCOFFStreamer(MCContext &Context, TargetAsmBackend &TAB, MCCodeEmitter &CE, - raw_ostream &OS) { - return new WinCOFFStreamer(Context, TAB, CE, OS); + raw_ostream &OS, + bool RelaxAll) { + WinCOFFStreamer *S = new WinCOFFStreamer(Context, TAB, CE, OS); + S->getAssembler().setRelaxAll(RelaxAll); + return S; } } diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 2e78557011331..b87ddf9c95b58 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -153,6 +153,7 @@ readExponent(StringRef::iterator begin, StringRef::iterator end) value += absExponent * 10; if (absExponent >= overlargeExponent) { absExponent = overlargeExponent; + p = end; /* outwit assert below */ break; } absExponent = value; diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 262fa42ab2ced..8a212a291f24d 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -2123,15 +2123,16 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, char *BufPtr = Buffer+65; uint64_t N; - if (Signed) { + if (!Signed) { + N = getZExtValue(); + } else { int64_t I = getSExtValue(); - if (I < 0) { + if (I >= 0) { + N = I; + } else { Str.push_back('-'); - I = -I; + N = -(uint64_t)I; } - N = I; - } else { - N = getZExtValue(); } while (N) { diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 366d2f799211d..0c70a402654e5 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMSupport circular_raw_ostream.cpp CommandLine.cpp ConstantRange.cpp + CrashRecoveryContext.cpp Debug.cpp DeltaAlgorithm.cpp DAGDeltaAlgorithm.cpp @@ -23,7 +24,6 @@ add_llvm_library(LLVMSupport PluginLoader.cpp PrettyStackTrace.cpp Regex.cpp - SlowOperationInformer.cpp SmallPtrSet.cpp SmallVector.cpp SourceMgr.cpp diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index 2746f7aaaa5e8..8ef3785f53318 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -21,6 +21,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Constants.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -38,7 +39,7 @@ ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) { /// Initialize a range to hold the single specified value. /// -ConstantRange::ConstantRange(const APInt & V) : Lower(V), Upper(V + 1) {} +ConstantRange::ConstantRange(const APInt &V) : Lower(V), Upper(V + 1) {} ConstantRange::ConstantRange(const APInt &L, const APInt &U) : Lower(L), Upper(U) { @@ -202,14 +203,12 @@ bool ConstantRange::contains(const APInt &V) const { } /// contains - Return true if the argument is a subset of this range. -/// Two equal set contain each other. The empty set is considered to be -/// contained by all other sets. +/// Two equal sets contain each other. The empty set contained by all other +/// sets. /// bool ConstantRange::contains(const ConstantRange &Other) const { - if (isFullSet()) return true; - if (Other.isFullSet()) return false; - if (Other.isEmptySet()) return true; - if (isEmptySet()) return false; + if (isFullSet() || Other.isEmptySet()) return true; + if (isEmptySet() || Other.isFullSet()) return false; if (!isWrappedSet()) { if (Other.isWrappedSet()) @@ -235,46 +234,6 @@ ConstantRange ConstantRange::subtract(const APInt &Val) const { return ConstantRange(Lower - Val, Upper - Val); } - -// intersect1Wrapped - This helper function is used to intersect two ranges when -// it is known that LHS is wrapped and RHS isn't. -// -ConstantRange -ConstantRange::intersect1Wrapped(const ConstantRange &LHS, - const ConstantRange &RHS) { - assert(LHS.isWrappedSet() && !RHS.isWrappedSet()); - - // Check to see if we overlap on the Left side of RHS... - // - if (RHS.Lower.ult(LHS.Upper)) { - // We do overlap on the left side of RHS, see if we overlap on the right of - // RHS... - if (RHS.Upper.ugt(LHS.Lower)) { - // Ok, the result overlaps on both the left and right sides. See if the - // resultant interval will be smaller if we wrap or not... - // - if (LHS.getSetSize().ult(RHS.getSetSize())) - return LHS; - else - return RHS; - - } else { - // No overlap on the right, just on the left. - return ConstantRange(RHS.Lower, LHS.Upper); - } - } else { - // We don't overlap on the left side of RHS, see if we overlap on the right - // of RHS... - if (RHS.Upper.ugt(LHS.Lower)) { - // Simple overlap... - return ConstantRange(LHS.Lower, RHS.Upper); - } else { - // No overlap... - return ConstantRange(LHS.getBitWidth(), false); - } - } -} - /// intersectWith - Return the range that results from the intersection of this /// range with another range. The resultant range is guaranteed to include all /// elements contained in both input ranges, and to have the smallest possible @@ -486,7 +445,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { assert(SrcTySize > DstTySize && "Not a value truncation"); APInt Size(APInt::getLowBitsSet(SrcTySize, DstTySize)); if (isFullSet() || getSetSize().ugt(Size)) - return ConstantRange(DstTySize); + return ConstantRange(DstTySize, /*isFullSet=*/true); APInt L = Lower; L.trunc(DstTySize); APInt U = Upper; U.trunc(DstTySize); @@ -539,6 +498,27 @@ ConstantRange::add(const ConstantRange &Other) const { } ConstantRange +ConstantRange::sub(const ConstantRange &Other) const { + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + if (isFullSet() || Other.isFullSet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize(); + APInt NewLower = getLower() - Other.getLower(); + APInt NewUpper = getUpper() - Other.getUpper() + 1; + if (NewLower == NewUpper) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + ConstantRange X = ConstantRange(NewLower, NewUpper); + if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y)) + // We've wrapped, therefore, full set. + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + return X; +} + +ConstantRange ConstantRange::multiply(const ConstantRange &Other) const { // TODO: If either operand is a single element and the multiply is known to // be non-wrapping, round the result min and max value to the appropriate @@ -616,40 +596,42 @@ ConstantRange::udiv(const ConstantRange &RHS) const { } ConstantRange -ConstantRange::shl(const ConstantRange &Amount) const { - if (isEmptySet()) - return *this; +ConstantRange::shl(const ConstantRange &Other) const { + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); - APInt min = getUnsignedMin() << Amount.getUnsignedMin(); - APInt max = getUnsignedMax() << Amount.getUnsignedMax(); + APInt min = getUnsignedMin().shl(Other.getUnsignedMin()); + APInt max = getUnsignedMax().shl(Other.getUnsignedMax()); // there's no overflow! APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros()); - if (Zeros.uge(Amount.getUnsignedMax())) - return ConstantRange(min, max); + if (Zeros.ugt(Other.getUnsignedMax())) + return ConstantRange(min, max + 1); // FIXME: implement the other tricky cases - return ConstantRange(getBitWidth()); + return ConstantRange(getBitWidth(), /*isFullSet=*/true); } ConstantRange -ConstantRange::ashr(const ConstantRange &Amount) const { - if (isEmptySet()) - return *this; +ConstantRange::lshr(const ConstantRange &Other) const { + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + + APInt max = getUnsignedMax().lshr(Other.getUnsignedMin()); + APInt min = getUnsignedMin().lshr(Other.getUnsignedMax()); + if (min == max + 1) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); - APInt min = getUnsignedMax().ashr(Amount.getUnsignedMin()); - APInt max = getUnsignedMin().ashr(Amount.getUnsignedMax()); - return ConstantRange(min, max); + return ConstantRange(min, max + 1); } -ConstantRange -ConstantRange::lshr(const ConstantRange &Amount) const { - if (isEmptySet()) - return *this; - - APInt min = getUnsignedMax().lshr(Amount.getUnsignedMin()); - APInt max = getUnsignedMin().lshr(Amount.getUnsignedMax()); - return ConstantRange(min, max); +ConstantRange ConstantRange::inverse() const { + if (isFullSet()) { + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + } else if (isEmptySet()) { + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + } + return ConstantRange(Upper, Lower); } /// print - Print out the bounds to a stream... @@ -668,5 +650,3 @@ void ConstantRange::print(raw_ostream &OS) const { void ConstantRange::dump() const { print(dbgs()); } - - diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp new file mode 100644 index 0000000000000..49258ede83c1d --- /dev/null +++ b/lib/Support/CrashRecoveryContext.cpp @@ -0,0 +1,204 @@ +//===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CrashRecoveryContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Config/config.h" +#include "llvm/System/Mutex.h" +#include "llvm/System/ThreadLocal.h" +#include <setjmp.h> +#include <cstdio> +using namespace llvm; + +namespace { + +struct CrashRecoveryContextImpl; + +static sys::ThreadLocal<const CrashRecoveryContextImpl> CurrentContext; + +struct CrashRecoveryContextImpl { + CrashRecoveryContext *CRC; + std::string Backtrace; + ::jmp_buf JumpBuffer; + volatile unsigned Failed : 1; + +public: + CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC), + Failed(false) { + CurrentContext.set(this); + } + ~CrashRecoveryContextImpl() { + CurrentContext.erase(); + } + + void HandleCrash() { + // Eliminate the current context entry, to avoid re-entering in case the + // cleanup code crashes. + CurrentContext.erase(); + + assert(!Failed && "Crash recovery context already failed!"); + Failed = true; + + // FIXME: Stash the backtrace. + + // Jump back to the RunSafely we were called under. + longjmp(JumpBuffer, 1); + } +}; + +} + +static sys::Mutex gCrashRecoveryContexMutex; +static bool gCrashRecoveryEnabled = false; + +CrashRecoveryContext::~CrashRecoveryContext() { + CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; + delete CRCI; +} + +CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { + const CrashRecoveryContextImpl *CRCI = CurrentContext.get(); + if (!CRCI) + return 0; + + return CRCI->CRC; +} + +#ifdef LLVM_ON_WIN32 + +// FIXME: No real Win32 implementation currently. + +void CrashRecoveryContext::Enable() { + sys::ScopedLock L(gCrashRecoveryContexMutex); + + if (gCrashRecoveryEnabled) + return; + + gCrashRecoveryEnabled = true; +} + +void CrashRecoveryContext::Disable() { + sys::ScopedLock L(gCrashRecoveryContexMutex); + + if (!gCrashRecoveryEnabled) + return; + + gCrashRecoveryEnabled = false; +} + +#else + +// Generic POSIX implementation. +// +// This implementation relies on synchronous signals being delivered to the +// current thread. We use a thread local object to keep track of the active +// crash recovery context, and install signal handlers to invoke HandleCrash on +// the active object. +// +// This implementation does not to attempt to chain signal handlers in any +// reliable fashion -- if we get a signal outside of a crash recovery context we +// simply disable crash recovery and raise the signal again. + +#include <signal.h> + +static int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP }; +static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]); +static struct sigaction PrevActions[NumSignals]; + +static void CrashRecoverySignalHandler(int Signal) { + // Lookup the current thread local recovery object. + const CrashRecoveryContextImpl *CRCI = CurrentContext.get(); + + if (!CRCI) { + // We didn't find a crash recovery context -- this means either we got a + // signal on a thread we didn't expect it on, the application got a signal + // outside of a crash recovery context, or something else went horribly + // wrong. + // + // Disable crash recovery and raise the signal again. The assumption here is + // that the enclosing application will terminate soon, and we won't want to + // attempt crash recovery again. + // + // This call of Disable isn't thread safe, but it doesn't actually matter. + CrashRecoveryContext::Disable(); + raise(Signal); + } + + // Unblock the signal we received. + sigset_t SigMask; + sigemptyset(&SigMask); + sigaddset(&SigMask, Signal); + sigprocmask(SIG_UNBLOCK, &SigMask, 0); + + if (CRCI) + const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash(); +} + +void CrashRecoveryContext::Enable() { + sys::ScopedLock L(gCrashRecoveryContexMutex); + + if (gCrashRecoveryEnabled) + return; + + gCrashRecoveryEnabled = true; + + // Setup the signal handler. + struct sigaction Handler; + Handler.sa_handler = CrashRecoverySignalHandler; + Handler.sa_flags = 0; + sigemptyset(&Handler.sa_mask); + + for (unsigned i = 0; i != NumSignals; ++i) { + sigaction(Signals[i], &Handler, &PrevActions[i]); + } +} + +void CrashRecoveryContext::Disable() { + sys::ScopedLock L(gCrashRecoveryContexMutex); + + if (!gCrashRecoveryEnabled) + return; + + gCrashRecoveryEnabled = false; + + // Restore the previous signal handlers. + for (unsigned i = 0; i != NumSignals; ++i) + sigaction(Signals[i], &PrevActions[i], 0); +} + +#endif + +bool CrashRecoveryContext::RunSafely(void (*Fn)(void*), void *UserData) { + // If crash recovery is disabled, do nothing. + if (gCrashRecoveryEnabled) { + assert(!Impl && "Crash recovery context already initialized!"); + CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this); + Impl = CRCI; + + if (setjmp(CRCI->JumpBuffer) != 0) { + return false; + } + } + + Fn(UserData); + return true; +} + +void CrashRecoveryContext::HandleCrash() { + CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; + assert(CRCI && "Crash recovery context never initialized!"); + CRCI->HandleCrash(); +} + +const std::string &CrashRecoveryContext::getBacktrace() const { + CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *) Impl; + assert(CRC && "Crash recovery context never initialized!"); + assert(CRC->Failed && "No crash was detected!"); + return CRC->Backtrace; +} diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index 7e7ca9debe9a7..0b7af3e5905be 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -18,8 +18,19 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/System/Signals.h" #include "llvm/System/Threading.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Config/config.h" #include <cassert> #include <cstdlib> + +#if defined(HAVE_UNISTD_H) +# include <unistd.h> +#endif +#if defined(_MSC_VER) +# include <io.h> +# include <fcntl.h> +#endif + using namespace llvm; using namespace std; @@ -39,19 +50,26 @@ void llvm::remove_fatal_error_handler() { ErrorHandler = 0; } -void llvm::report_fatal_error(const char *reason) { - report_fatal_error(Twine(reason)); +void llvm::report_fatal_error(const char *Reason) { + report_fatal_error(Twine(Reason)); } -void llvm::report_fatal_error(const std::string &reason) { - report_fatal_error(Twine(reason)); +void llvm::report_fatal_error(const std::string &Reason) { + report_fatal_error(Twine(Reason)); } -void llvm::report_fatal_error(const Twine &reason) { - if (!ErrorHandler) { - errs() << "LLVM ERROR: " << reason << "\n"; +void llvm::report_fatal_error(const Twine &Reason) { + if (ErrorHandler) { + ErrorHandler(ErrorHandlerUserData, Reason.str()); } else { - ErrorHandler(ErrorHandlerUserData, reason.str()); + // Blast the result out to stderr. We don't try hard to make sure this + // succeeds (e.g. handling EINTR) and we can't use errs() here because + // raw ostreams can call report_fatal_error. + SmallVector<char, 64> Buffer; + raw_svector_ostream OS(Buffer); + OS << "LLVM ERROR: " << Reason << "\n"; + StringRef MessageStr = OS.str(); + (void)::write(2, MessageStr.data(), MessageStr.size()); } // If we reached here, we are failing ungracefully. Run the interrupt handlers diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index b8dca334da494..29b5952208874 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -23,6 +23,37 @@ using namespace llvm; //===----------------------------------------------------------------------===// +// FoldingSetNodeIDRef Implementation + +/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, +/// used to lookup the node in the FoldingSetImpl. +unsigned FoldingSetNodeIDRef::ComputeHash() const { + // This is adapted from SuperFastHash by Paul Hsieh. + unsigned Hash = static_cast<unsigned>(Size); + for (const unsigned *BP = Data, *E = BP+Size; BP != E; ++BP) { + unsigned Data = *BP; + Hash += Data & 0xFFFF; + unsigned Tmp = ((Data >> 16) << 11) ^ Hash; + Hash = (Hash << 16) ^ Tmp; + Hash += Hash >> 11; + } + + // Force "avalanching" of final 127 bits. + Hash ^= Hash << 3; + Hash += Hash >> 5; + Hash ^= Hash << 4; + Hash += Hash >> 17; + Hash ^= Hash << 25; + Hash += Hash >> 6; + return Hash; +} + +bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const { + if (Size != RHS.Size) return false; + return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0; +} + +//===----------------------------------------------------------------------===// // FoldingSetNodeID Implementation /// Add* - Add various data types to Bit data. @@ -104,31 +135,19 @@ void FoldingSetNodeID::AddString(StringRef String) { /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to /// lookup the node in the FoldingSetImpl. unsigned FoldingSetNodeID::ComputeHash() const { - // This is adapted from SuperFastHash by Paul Hsieh. - unsigned Hash = static_cast<unsigned>(Bits.size()); - for (const unsigned *BP = &Bits[0], *E = BP+Bits.size(); BP != E; ++BP) { - unsigned Data = *BP; - Hash += Data & 0xFFFF; - unsigned Tmp = ((Data >> 16) << 11) ^ Hash; - Hash = (Hash << 16) ^ Tmp; - Hash += Hash >> 11; - } - - // Force "avalanching" of final 127 bits. - Hash ^= Hash << 3; - Hash += Hash >> 5; - Hash ^= Hash << 4; - Hash += Hash >> 17; - Hash ^= Hash << 25; - Hash += Hash >> 6; - return Hash; + return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); } /// operator== - Used to compare two nodes to each other. /// bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{ - if (Bits.size() != RHS.Bits.size()) return false; - return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0; + return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size()); +} + +/// operator== - Used to compare two nodes to each other. +/// +bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const { + return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS; } /// Intern - Copy this node's data to a memory region allocated from the @@ -168,10 +187,9 @@ static void **GetBucketPtr(void *NextInBucketPtr) { /// GetBucketFor - Hash the specified node ID and return the hash bucket for /// the specified ID. -static void **GetBucketFor(const FoldingSetNodeID &ID, - void **Buckets, unsigned NumBuckets) { +static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) { // NumBuckets is always a power of 2. - unsigned BucketNum = ID.ComputeHash() & (NumBuckets-1); + unsigned BucketNum = Hash & (NumBuckets-1); return Buckets + BucketNum; } @@ -219,7 +237,7 @@ void FoldingSetImpl::GrowHashTable() { NumNodes = 0; // Walk the old buckets, rehashing nodes into their new place. - FoldingSetNodeID ID; + FoldingSetNodeID TempID; for (unsigned i = 0; i != OldNumBuckets; ++i) { void *Probe = OldBuckets[i]; if (!Probe) continue; @@ -229,9 +247,10 @@ void FoldingSetImpl::GrowHashTable() { NodeInBucket->SetNextInBucket(0); // Insert the node into the new bucket, after recomputing the hash. - GetNodeProfile(ID, NodeInBucket); - InsertNode(NodeInBucket, GetBucketFor(ID, Buckets, NumBuckets)); - ID.clear(); + InsertNode(NodeInBucket, + GetBucketFor(ComputeNodeHash(NodeInBucket, TempID), + Buckets, NumBuckets)); + TempID.clear(); } } @@ -245,19 +264,18 @@ FoldingSetImpl::Node *FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { - void **Bucket = GetBucketFor(ID, Buckets, NumBuckets); + void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets); void *Probe = *Bucket; InsertPos = 0; - FoldingSetNodeID OtherID; + FoldingSetNodeID TempID; while (Node *NodeInBucket = GetNextPtr(Probe)) { - GetNodeProfile(OtherID, NodeInBucket); - if (OtherID == ID) + if (NodeEquals(NodeInBucket, ID, TempID)) return NodeInBucket; + TempID.clear(); Probe = NodeInBucket->getNextInBucket(); - OtherID.clear(); } // Didn't find the node, return null with the bucket as the InsertPos. @@ -273,9 +291,8 @@ void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { // Do we need to grow the hashtable? if (NumNodes+1 > NumBuckets*2) { GrowHashTable(); - FoldingSetNodeID ID; - GetNodeProfile(ID, N); - InsertPos = GetBucketFor(ID, Buckets, NumBuckets); + FoldingSetNodeID TempID; + InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets); } ++NumNodes; @@ -341,7 +358,7 @@ bool FoldingSetImpl::RemoveNode(Node *N) { /// instead. FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { FoldingSetNodeID ID; - GetNodeProfile(ID, N); + GetNodeProfile(N, ID); void *IP; if (Node *E = FindNodeOrInsertPos(ID, IP)) return E; diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index a99ab2f30df0e..3c8a10849d149 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -72,7 +72,7 @@ asm(".desc ___crashreporter_info__, 0x10"); /// CrashHandler - This callback is run if a fatal signal is delivered to the /// process, it prints the pretty stack trace. -static void CrashHandler(void *Cookie) { +static void CrashHandler(void *) { #ifndef __APPLE__ // On non-apple systems, just emit the crash stack trace to stderr. PrintCurStackTrace(errs()); @@ -89,7 +89,8 @@ static void CrashHandler(void *Cookie) { #ifndef HAVE_CRASHREPORTERCLIENT_H __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str()); #else - CRSetCrashLogMessage(std::string(TmpStr.str()).c_str()); + // Cast to void to avoid warning. + (void)CRSetCrashLogMessage(std::string(TmpStr.str()).c_str()); #endif errs() << TmpStr.str(); } diff --git a/lib/Support/SlowOperationInformer.cpp b/lib/Support/SlowOperationInformer.cpp deleted file mode 100644 index b4e9430e5fdfe..0000000000000 --- a/lib/Support/SlowOperationInformer.cpp +++ /dev/null @@ -1,67 +0,0 @@ -//===-- SlowOperationInformer.cpp - Keep the user informed ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SlowOperationInformer class for the LLVM debugger. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/SlowOperationInformer.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/System/Alarm.h" -#include <sstream> -#include <cassert> -using namespace llvm; - -SlowOperationInformer::SlowOperationInformer(const std::string &Name) - : OperationName(Name), LastPrintAmount(0) { - sys::SetupAlarm(1); -} - -SlowOperationInformer::~SlowOperationInformer() { - sys::TerminateAlarm(); - if (LastPrintAmount) { - // If we have printed something, make _sure_ we print the 100% amount, and - // also print a newline. - outs() << std::string(LastPrintAmount, '\b') << "Progress " - << OperationName << ": 100% \n"; - } -} - -/// progress - Clients should periodically call this method when they are in -/// an exception-safe state. The Amount variable should indicate how far -/// along the operation is, given in 1/10ths of a percent (in other words, -/// Amount should range from 0 to 1000). -bool SlowOperationInformer::progress(unsigned Amount) { - int status = sys::AlarmStatus(); - if (status == -1) { - outs() << "\n"; - LastPrintAmount = 0; - return true; - } - - // If we haven't spent enough time in this operation to warrant displaying the - // progress bar, don't do so yet. - if (status == 0) - return false; - - // Delete whatever we printed last time. - std::string ToPrint = std::string(LastPrintAmount, '\b'); - - std::ostringstream OS; - OS << "Progress " << OperationName << ": " << Amount/10; - if (unsigned Rem = Amount % 10) - OS << "." << Rem << "%"; - else - OS << "% "; - - LastPrintAmount = OS.str().size(); - outs() << ToPrint+OS.str(); - outs().flush(); - return false; -} diff --git a/lib/Support/SmallVector.cpp b/lib/Support/SmallVector.cpp index 2e17af864155a..a89f14957635e 100644 --- a/lib/Support/SmallVector.cpp +++ b/lib/Support/SmallVector.cpp @@ -18,7 +18,7 @@ using namespace llvm; /// on POD-like datatypes and is out of line to reduce code duplication. void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) { size_t CurSizeBytes = size_in_bytes(); - size_t NewCapacityInBytes = 2 * capacity_in_bytes(); + size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow. if (NewCapacityInBytes < MinSizeInBytes) NewCapacityInBytes = MinSizeInBytes; diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 7d5f65af28422..e32ab74a2d4c4 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -44,7 +44,7 @@ Enabled("stats", cl::desc("Enable statistics output from program")); namespace { /// StatisticInfo - This class is used in a ManagedStatic so that it is created -/// on demand (when the first statistic is bumped) and destroyed only when +/// on demand (when the first statistic is bumped) and destroyed only when /// llvm_shutdown is called. We print statistics from the destructor. class StatisticInfo { std::vector<const Statistic*> Stats; @@ -52,7 +52,7 @@ class StatisticInfo { friend void llvm::PrintStatistics(raw_ostream &OS); public: ~StatisticInfo(); - + void addStatistic(const Statistic *S) { Stats.push_back(S); } @@ -71,7 +71,7 @@ void Statistic::RegisterStatistic() { if (!Initialized) { if (Enabled) StatInfo->addStatistic(this); - + sys::MemoryFence(); // Remember we have been registered. Initialized = true; @@ -84,7 +84,7 @@ struct NameCompare { bool operator()(const Statistic *LHS, const Statistic *RHS) const { int Cmp = std::strcmp(LHS->getName(), RHS->getName()); if (Cmp != 0) return Cmp < 0; - + // Secondary key is the description. return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0; } @@ -112,7 +112,7 @@ void llvm::PrintStatistics(raw_ostream &OS) { MaxNameLen = std::max(MaxNameLen, (unsigned)std::strlen(Stats.Stats[i]->getName())); } - + // Sort the fields by name. std::stable_sort(Stats.Stats.begin(), Stats.Stats.end(), NameCompare()); @@ -120,7 +120,7 @@ void llvm::PrintStatistics(raw_ostream &OS) { OS << "===" << std::string(73, '-') << "===\n" << " ... Statistics Collected ...\n" << "===" << std::string(73, '-') << "===\n\n"; - + // Print all of the statistics. for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) { std::string CountStr = utostr(Stats.Stats[i]->getValue()); @@ -129,7 +129,7 @@ void llvm::PrintStatistics(raw_ostream &OS) { << std::string(MaxNameLen-std::strlen(Stats.Stats[i]->getName()), ' ') << " - " << Stats.Stats[i]->getDesc() << "\n"; } - + OS << '\n'; // Flush the output stream. OS.flush(); diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index ca0f518a88b62..46f26b242aac3 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -9,6 +9,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/APInt.h" +#include <bitset> using namespace llvm; @@ -30,14 +31,14 @@ static bool ascii_isdigit(char x) { /// compare_lower - Compare strings, ignoring case. int StringRef::compare_lower(StringRef RHS) const { for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { - char LHC = ascii_tolower(Data[I]); - char RHC = ascii_tolower(RHS.Data[I]); + unsigned char LHC = ascii_tolower(Data[I]); + unsigned char RHC = ascii_tolower(RHS.Data[I]); if (LHC != RHC) return LHC < RHC ? -1 : 1; } if (Length == RHS.Length) - return 0; + return 0; return Length < RHS.Length ? -1 : 1; } @@ -58,10 +59,10 @@ int StringRef::compare_numeric(StringRef RHS) const { break; } } - return Data[I] < RHS.Data[I] ? -1 : 1; + return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1; } if (Length == RHS.Length) - return 0; + return 0; return Length < RHS.Length ? -1 : 1; } @@ -153,11 +154,15 @@ size_t StringRef::rfind(StringRef Str) const { /// find_first_of - Find the first character in the string that is in \arg /// Chars, or npos if not found. /// -/// Note: O(size() * Chars.size()) +/// Note: O(size() + Chars.size()) StringRef::size_type StringRef::find_first_of(StringRef Chars, size_t From) const { + std::bitset<1 << CHAR_BIT> CharBits; + for (size_type i = 0; i != Chars.size(); ++i) + CharBits.set((unsigned char)Chars[i]); + for (size_type i = min(From, Length), e = Length; i != e; ++i) - if (Chars.find(Data[i]) != npos) + if (CharBits.test((unsigned char)Data[i])) return i; return npos; } @@ -174,11 +179,15 @@ StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const { /// find_first_not_of - Find the first character in the string that is not /// in the string \arg Chars, or npos if not found. /// -/// Note: O(size() * Chars.size()) +/// Note: O(size() + Chars.size()) StringRef::size_type StringRef::find_first_not_of(StringRef Chars, size_t From) const { + std::bitset<1 << CHAR_BIT> CharBits; + for (size_type i = 0; i != Chars.size(); ++i) + CharBits.set((unsigned char)Chars[i]); + for (size_type i = min(From, Length), e = Length; i != e; ++i) - if (Chars.find(Data[i]) == npos) + if (!CharBits.test((unsigned char)Data[i])) return i; return npos; } diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp index 299032f187156..c8b260c2e3dd9 100644 --- a/lib/Support/SystemUtils.cpp +++ b/lib/Support/SystemUtils.cpp @@ -49,6 +49,16 @@ sys::Path llvm::FindExecutable(const std::string &ExeName, Result.appendComponent(ExeName); if (Result.canExecute()) return Result; + // If the path is absolute (and it usually is), call FindProgramByName to + // allow it to try platform-specific logic, such as appending a .exe suffix + // on Windows. Don't do this if we somehow have a relative path, because + // we don't want to go searching the PATH and accidentally find an unrelated + // version of the program. + if (Result.isAbsolute()) { + Result = sys::Program::FindProgramByName(Result.str()); + if (!Result.empty()) + return Result; + } } return sys::Path(); diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 6a70449b56dc7..3a95b65e69000 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -221,121 +221,238 @@ const char *Triple::getArchNameForAssembler() { // -void Triple::Parse() const { - assert(!isInitialized() && "Invalid parse call."); - - StringRef ArchName = getArchName(); - StringRef VendorName = getVendorName(); - StringRef OSName = getOSName(); - +Triple::ArchType Triple::ParseArch(StringRef ArchName) { if (ArchName.size() == 4 && ArchName[0] == 'i' && ArchName[2] == '8' && ArchName[3] == '6' && ArchName[1] - '3' < 6) // i[3-9]86 - Arch = x86; + return x86; else if (ArchName == "amd64" || ArchName == "x86_64") - Arch = x86_64; + return x86_64; else if (ArchName == "bfin") - Arch = bfin; + return bfin; else if (ArchName == "pic16") - Arch = pic16; + return pic16; else if (ArchName == "powerpc") - Arch = ppc; + return ppc; else if ((ArchName == "powerpc64") || (ArchName == "ppu")) - Arch = ppc64; + return ppc64; else if (ArchName == "mblaze") - Arch = mblaze; + return mblaze; else if (ArchName == "arm" || ArchName.startswith("armv") || ArchName == "xscale") - Arch = arm; + return arm; else if (ArchName == "thumb" || ArchName.startswith("thumbv")) - Arch = thumb; + return thumb; else if (ArchName.startswith("alpha")) - Arch = alpha; + return alpha; else if (ArchName == "spu" || ArchName == "cellspu") - Arch = cellspu; + return cellspu; else if (ArchName == "msp430") - Arch = msp430; + return msp430; else if (ArchName == "mips" || ArchName == "mipsallegrex") - Arch = mips; + return mips; else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" || ArchName == "psp") - Arch = mipsel; + return mipsel; else if (ArchName == "sparc") - Arch = sparc; + return sparc; else if (ArchName == "sparcv9") - Arch = sparcv9; + return sparcv9; else if (ArchName == "s390x") - Arch = systemz; + return systemz; else if (ArchName == "tce") - Arch = tce; + return tce; else if (ArchName == "xcore") - Arch = xcore; + return xcore; else - Arch = UnknownArch; - - - // Handle some exceptional cases where the OS / environment components are - // stuck into the vendor field. - if (StringRef(getTriple()).count('-') == 1) { - StringRef VendorName = getVendorName(); - - if (VendorName.startswith("mingw32")) { // 'i386-mingw32', etc. - Vendor = PC; - OS = MinGW32; - return; - } - - // arm-elf is another example, but we don't currently parse anything about - // the environment. - } + return UnknownArch; +} +Triple::VendorType Triple::ParseVendor(StringRef VendorName) { if (VendorName == "apple") - Vendor = Apple; + return Apple; else if (VendorName == "pc") - Vendor = PC; + return PC; else - Vendor = UnknownVendor; + return UnknownVendor; +} +Triple::OSType Triple::ParseOS(StringRef OSName) { if (OSName.startswith("auroraux")) - OS = AuroraUX; + return AuroraUX; else if (OSName.startswith("cygwin")) - OS = Cygwin; + return Cygwin; else if (OSName.startswith("darwin")) - OS = Darwin; + return Darwin; else if (OSName.startswith("dragonfly")) - OS = DragonFly; + return DragonFly; else if (OSName.startswith("freebsd")) - OS = FreeBSD; + return FreeBSD; else if (OSName.startswith("linux")) - OS = Linux; + return Linux; else if (OSName.startswith("lv2")) - OS = Lv2; + return Lv2; else if (OSName.startswith("mingw32")) - OS = MinGW32; + return MinGW32; else if (OSName.startswith("mingw64")) - OS = MinGW64; + return MinGW64; else if (OSName.startswith("netbsd")) - OS = NetBSD; + return NetBSD; else if (OSName.startswith("openbsd")) - OS = OpenBSD; + return OpenBSD; else if (OSName.startswith("psp")) - OS = Psp; + return Psp; else if (OSName.startswith("solaris")) - OS = Solaris; + return Solaris; else if (OSName.startswith("win32")) - OS = Win32; + return Win32; else if (OSName.startswith("haiku")) - OS = Haiku; + return Haiku; else if (OSName.startswith("minix")) - OS = Minix; + return Minix; else - OS = UnknownOS; + return UnknownOS; +} + +void Triple::Parse() const { + assert(!isInitialized() && "Invalid parse call."); + + Arch = ParseArch(getArchName()); + Vendor = ParseVendor(getVendorName()); + OS = ParseOS(getOSName()); assert(isInitialized() && "Failed to initialize!"); } +std::string Triple::normalize(StringRef Str) { + // Parse into components. + SmallVector<StringRef, 4> Components; + for (size_t First = 0, Last = 0; Last != StringRef::npos; First = Last + 1) { + Last = Str.find('-', First); + Components.push_back(Str.slice(First, Last)); + } + + // If the first component corresponds to a known architecture, preferentially + // use it for the architecture. If the second component corresponds to a + // known vendor, preferentially use it for the vendor, etc. This avoids silly + // component movement when a component parses as (eg) both a valid arch and a + // valid os. + ArchType Arch = UnknownArch; + if (Components.size() > 0) + Arch = ParseArch(Components[0]); + VendorType Vendor = UnknownVendor; + if (Components.size() > 1) + Vendor = ParseVendor(Components[1]); + OSType OS = UnknownOS; + if (Components.size() > 2) + OS = ParseOS(Components[2]); + + // Note which components are already in their final position. These will not + // be moved. + bool Found[3]; + Found[0] = Arch != UnknownArch; + Found[1] = Vendor != UnknownVendor; + Found[2] = OS != UnknownOS; + + // If they are not there already, permute the components into their canonical + // positions by seeing if they parse as a valid architecture, and if so moving + // the component to the architecture position etc. + for (unsigned Pos = 0; Pos != 3; ++Pos) { + if (Found[Pos]) + continue; // Already in the canonical position. + + for (unsigned Idx = 0; Idx != Components.size(); ++Idx) { + // Do not reparse any components that already matched. + if (Idx < 3 && Found[Idx]) + continue; + + // Does this component parse as valid for the target position? + bool Valid = false; + StringRef Comp = Components[Idx]; + switch (Pos) { + default: + assert(false && "unexpected component type!"); + case 0: + Arch = ParseArch(Comp); + Valid = Arch != UnknownArch; + break; + case 1: + Vendor = ParseVendor(Comp); + Valid = Vendor != UnknownVendor; + break; + case 2: + OS = ParseOS(Comp); + Valid = OS != UnknownOS; + break; + } + if (!Valid) + continue; // Nope, try the next component. + + // Move the component to the target position, pushing any non-fixed + // components that are in the way to the right. This tends to give + // good results in the common cases of a forgotten vendor component + // or a wrongly positioned environment. + if (Pos < Idx) { + // Insert left, pushing the existing components to the right. For + // example, a-b-i386 -> i386-a-b when moving i386 to the front. + StringRef CurrentComponent(""); // The empty component. + // Replace the component we are moving with an empty component. + std::swap(CurrentComponent, Components[Idx]); + // Insert the component being moved at Pos, displacing any existing + // components to the right. + for (unsigned i = Pos; !CurrentComponent.empty(); ++i) { + // Skip over any fixed components. + while (i < 3 && Found[i]) ++i; + // Place the component at the new position, getting the component + // that was at this position - it will be moved right. + std::swap(CurrentComponent, Components[i]); + } + } else if (Pos > Idx) { + // Push right by inserting empty components until the component at Idx + // reaches the target position Pos. For example, pc-a -> -pc-a when + // moving pc to the second position. + do { + // Insert one empty component at Idx. + StringRef CurrentComponent(""); // The empty component. + for (unsigned i = Idx; i < Components.size(); ++i) { + // Skip over any fixed components. + while (i < 3 && Found[i]) ++i; + // Place the component at the new position, getting the component + // that was at this position - it will be moved right. + std::swap(CurrentComponent, Components[i]); + // If it was placed on top of an empty component then we are done. + if (CurrentComponent.empty()) + break; + } + // The last component was pushed off the end - append it. + if (!CurrentComponent.empty()) + Components.push_back(CurrentComponent); + + // Advance Idx to the component's new position. + while (++Idx < 3 && Found[Idx]) {} + } while (Idx < Pos); // Add more until the final position is reached. + } + assert(Pos < Components.size() && Components[Pos] == Comp && + "Component moved wrong!"); + Found[Pos] = true; + break; + } + } + + // Special case logic goes here. At this point Arch, Vendor and OS have the + // correct values for the computed components. + + // Stick the corrected components back together to form the normalized string. + std::string Normalized; + for (unsigned i = 0, e = Components.size(); i != e; ++i) { + if (i) Normalized += '-'; + Normalized += Components[i]; + } + return Normalized; +} + StringRef Triple::getArchName() const { return StringRef(Data).split('-').first; // Isolate first component } diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 8054ae63688c9..dba46df362566 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -19,6 +19,7 @@ #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/System/Signals.h" #include "llvm/ADT/STLExtras.h" #include <cctype> #include <cerrno> @@ -56,13 +57,6 @@ raw_ostream::~raw_ostream() { if (BufferMode == InternalBuffer) delete [] OutBufStart; - - // If there are any pending errors, report them now. Clients wishing - // to avoid report_fatal_error calls should check for errors with - // has_error() and clear the error flag with clear_error() before - // destructing raw_ostream objects which may have errors. - if (Error) - report_fatal_error("IO failure on output stream."); } // An out of line virtual method to provide a home for the class vtable. @@ -143,9 +137,10 @@ raw_ostream &raw_ostream::operator<<(unsigned long long N) { } raw_ostream &raw_ostream::operator<<(long long N) { - if (N < 0) { + if (N < 0) { *this << '-'; - N = -N; + // Avoid undefined behavior on INT64_MIN with a cast. + N = -(unsigned long long)N; } return this->operator<<(static_cast<unsigned long long>(N)); @@ -368,7 +363,7 @@ void format_object_base::home() { /// stream should be immediately destroyed; the string will be empty /// if no error occurred. raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, - unsigned Flags) : pos(0) { + unsigned Flags) : Error(false), pos(0) { assert(Filename != 0 && "Filename is null"); // Verify that we don't have both "append" and "excl". assert((!(Flags & F_Excl) || !(Flags & F_Append)) && @@ -376,14 +371,17 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, ErrorInfo.clear(); - // Handle "-" as stdout. + // Handle "-" as stdout. Note that when we do this, we consider ourself + // the owner of stdout. This means that we can do things like close the + // file descriptor when we're done and set the "binary" flag globally. if (Filename[0] == '-' && Filename[1] == 0) { FD = STDOUT_FILENO; // If user requested binary then put stdout into binary mode if // possible. if (Flags & F_Binary) sys::Program::ChangeStdoutToBinary(); - ShouldClose = false; + // Close stdout when we're done, to detect any output errors. + ShouldClose = true; return; } @@ -413,14 +411,22 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, } raw_fd_ostream::~raw_fd_ostream() { - if (FD < 0) return; - flush(); - if (ShouldClose) - while (::close(FD) != 0) - if (errno != EINTR) { - error_detected(); - break; - } + if (FD >= 0) { + flush(); + if (ShouldClose) + while (::close(FD) != 0) + if (errno != EINTR) { + error_detected(); + break; + } + } + + // If there are any pending errors, report them now. Clients wishing + // to avoid report_fatal_error calls should check for errors with + // has_error() and clear the error flag with clear_error() before + // destructing raw_ostream objects which may have errors. + if (has_error()) + report_fatal_error("IO failure on output stream."); } @@ -534,30 +540,24 @@ bool raw_fd_ostream::is_displayed() const { } //===----------------------------------------------------------------------===// -// raw_stdout/err_ostream +// outs(), errs(), nulls() //===----------------------------------------------------------------------===// -// Set buffer settings to model stdout and stderr behavior. -// Set standard error to be unbuffered by default. -raw_stdout_ostream::raw_stdout_ostream():raw_fd_ostream(STDOUT_FILENO, false) {} -raw_stderr_ostream::raw_stderr_ostream():raw_fd_ostream(STDERR_FILENO, false, - true) {} - -// An out of line virtual method to provide a home for the class vtable. -void raw_stdout_ostream::handle() {} -void raw_stderr_ostream::handle() {} - /// outs() - This returns a reference to a raw_ostream for standard output. /// Use it like: outs() << "foo" << "bar"; raw_ostream &llvm::outs() { - static raw_stdout_ostream S; + // Set buffer settings to model stdout behavior. + // Delete the file descriptor when the program exists, forcing error + // detection. If you don't want this behavior, don't use outs(). + static raw_fd_ostream S(STDOUT_FILENO, true); return S; } /// errs() - This returns a reference to a raw_ostream for standard error. /// Use it like: errs() << "foo" << "bar"; raw_ostream &llvm::errs() { - static raw_stderr_ostream S; + // Set standard error to be unbuffered by default. + static raw_fd_ostream S(STDERR_FILENO, false, true); return S; } @@ -665,3 +665,34 @@ void raw_null_ostream::write_impl(const char *Ptr, size_t Size) { uint64_t raw_null_ostream::current_pos() const { return 0; } + +//===----------------------------------------------------------------------===// +// tool_output_file +//===----------------------------------------------------------------------===// + +tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename) + : Filename(filename), Keep(false) { + // Arrange for the file to be deleted if the process is killed. + if (Filename != "-") + sys::RemoveFileOnSignal(sys::Path(Filename)); +} + +tool_output_file::CleanupInstaller::~CleanupInstaller() { + // Delete the file if the client hasn't told us not to. + if (!Keep && Filename != "-") + sys::Path(Filename).eraseFromDisk(); + + // Ok, the file is successfully written and closed, or deleted. There's no + // further need to clean it up on signals. + if (Filename != "-") + sys::DontRemoveFileOnSignal(sys::Path(Filename)); +} + +tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo, + unsigned Flags) + : Installer(filename), + OS(filename, ErrorInfo, Flags) { + // If open fails, no cleanup is needed. + if (!ErrorInfo.empty()) + Installer.Keep = true; +} diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp index 6f6890c06c499..660db492d6b9a 100644 --- a/lib/System/DynamicLibrary.cpp +++ b/lib/System/DynamicLibrary.cpp @@ -70,6 +70,12 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *Filename, if (ErrMsg) *ErrMsg = dlerror(); return true; } +#ifdef __CYGWIN__ + // Cygwin searches symbols only in the main + // with the handle of dlopen(NULL, RTLD_GLOBAL). + if (Filename == NULL) + H = RTLD_DEFAULT; +#endif if (OpenedHandles == 0) OpenedHandles = new std::vector<void *>(); OpenedHandles->push_back(H); diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp index 1235257b27e20..4445c667d86e9 100644 --- a/lib/System/Path.cpp +++ b/lib/System/Path.cpp @@ -61,7 +61,7 @@ sys::IdentifyFileType(const char *magic, unsigned length) { if (memcmp(magic,"!<arch>\n",8) == 0) return Archive_FileType; break; - + case '\177': if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') { if (length >= 18 && magic[17] == 0) @@ -76,11 +76,11 @@ sys::IdentifyFileType(const char *magic, unsigned length) { break; case 0xCA: - if (magic[1] == char(0xFE) && magic[2] == char(0xBA) && + if (magic[1] == char(0xFE) && magic[2] == char(0xBA) && magic[3] == char(0xBE)) { - // This is complicated by an overlap with Java class files. + // This is complicated by an overlap with Java class files. // See the Mach-O section in /usr/share/file/magic for details. - if (length >= 8 && magic[7] < 43) + if (length >= 8 && magic[7] < 43) // FIXME: Universal Binary of any type. return Mach_O_DynamicallyLinkedSharedLib_FileType; } @@ -89,18 +89,18 @@ sys::IdentifyFileType(const char *magic, unsigned length) { case 0xFE: case 0xCE: { uint16_t type = 0; - if (magic[0] == char(0xFE) && magic[1] == char(0xED) && + if (magic[0] == char(0xFE) && magic[1] == char(0xED) && magic[2] == char(0xFA) && magic[3] == char(0xCE)) { /* Native endian */ if (length >= 16) type = magic[14] << 8 | magic[15]; - } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) && + } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) && magic[2] == char(0xED) && magic[3] == char(0xFE)) { /* Reverse endian */ if (length >= 14) type = magic[13] << 8 | magic[12]; } switch (type) { - default: break; - case 1: return Mach_O_Object_FileType; + default: break; + case 1: return Mach_O_Object_FileType; case 2: return Mach_O_Executable_FileType; case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType; case 4: return Mach_O_Core_FileType; @@ -219,38 +219,38 @@ static StringRef getDirnameCharSep(StringRef path, const char *Sep) { "Sep must be a 1-character string literal."); if (path.empty()) return "."; - + // If the path is all slashes, return a single slash. // Otherwise, remove all trailing slashes. - + signed pos = static_cast<signed>(path.size()) - 1; - + while (pos >= 0 && path[pos] == Sep[0]) --pos; - + if (pos < 0) return path[0] == Sep[0] ? Sep : "."; - + // Any slashes left? signed i = 0; - + while (i < pos && path[i] != Sep[0]) ++i; - + if (i == pos) // No slashes? Return "." return "."; - - // There is at least one slash left. Remove all trailing non-slashes. + + // There is at least one slash left. Remove all trailing non-slashes. while (pos >= 0 && path[pos] != Sep[0]) --pos; - + // Remove any trailing slashes. while (pos >= 0 && path[pos] == Sep[0]) --pos; - + if (pos < 0) return path[0] == Sep[0] ? Sep : "."; - + return path.substr(0, pos+1); } diff --git a/lib/System/RWMutex.cpp b/lib/System/RWMutex.cpp index 5faf220eb9168..deb04709d829c 100644 --- a/lib/System/RWMutex.cpp +++ b/lib/System/RWMutex.cpp @@ -71,23 +71,9 @@ RWMutexImpl::RWMutexImpl() bzero(rwlock, sizeof(pthread_rwlock_t)); #endif - pthread_rwlockattr_t attr; - - // Initialize the rwlock attributes - int errorcode = pthread_rwlockattr_init(&attr); - assert(errorcode == 0); - -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__) - // Make it a process local rwlock - errorcode = pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE); -#endif - // Initialize the rwlock - errorcode = pthread_rwlock_init(rwlock, &attr); - assert(errorcode == 0); - - // Destroy the attributes - errorcode = pthread_rwlockattr_destroy(&attr); + int errorcode = pthread_rwlock_init(rwlock, NULL); + (void)errorcode; assert(errorcode == 0); // Assign the data member diff --git a/lib/System/ThreadLocal.cpp b/lib/System/ThreadLocal.cpp index e7054b5281471..f6a55a1c0b9b1 100644 --- a/lib/System/ThreadLocal.cpp +++ b/lib/System/ThreadLocal.cpp @@ -27,6 +27,7 @@ ThreadLocalImpl::ThreadLocalImpl() { } ThreadLocalImpl::~ThreadLocalImpl() { } void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);} const void* ThreadLocalImpl::getInstance() { return data; } +void ThreadLocalImpl::removeInstance() { data = 0; } } #else @@ -67,6 +68,10 @@ const void* ThreadLocalImpl::getInstance() { return pthread_getspecific(*key); } +void ThreadLocalImpl::removeInstance() { + setInstance(0); +} + } #elif defined(LLVM_ON_UNIX) diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc index bc104a32a3aea..47e4d1ac3c6b5 100644 --- a/lib/System/Unix/Path.inc +++ b/lib/System/Unix/Path.inc @@ -276,20 +276,20 @@ Path::GetCurrentDirectory() { char pathname[MAXPATHLEN]; if (!getcwd(pathname,MAXPATHLEN)) { assert (false && "Could not query current working directory."); - return Path(""); + return Path(); } return Path(pathname); } -#ifdef __FreeBSD__ +#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__minix) static int test_dir(char buf[PATH_MAX], char ret[PATH_MAX], const char *dir, const char *bin) { struct stat sb; - snprintf(buf, PATH_MAX, "%s//%s", dir, bin); + snprintf(buf, PATH_MAX, "%s/%s", dir, bin); if (realpath(buf, ret) == NULL) return (1); if (stat(buf, &sb) != 0) @@ -334,7 +334,7 @@ getprogpath(char ret[PATH_MAX], const char *bin) free(pv); return (NULL); } -#endif // __FreeBSD__ +#endif // __FreeBSD__ || __NetBSD__ /// GetMainExecutable - Return the path to the main executable, given the /// value of argv[0] from program startup. @@ -350,7 +350,7 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { if (realpath(exe_path, link_path)) return Path(std::string(link_path)); } -#elif defined(__FreeBSD__) +#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__minix) char exe_path[PATH_MAX]; if (getprogpath(exe_path, argv0) != NULL) @@ -408,7 +408,7 @@ Path::getSuffix() const { std::string::size_type dot = path.rfind('.'); if (dot == std::string::npos || dot < slash) - return StringRef(""); + return StringRef(); else return StringRef(path).substr(dot + 1); } diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc index 1e74647e5fdcb..7b7c43efc7864 100644 --- a/lib/System/Unix/Signals.inc +++ b/lib/System/Unix/Signals.inc @@ -182,6 +182,16 @@ bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename, return false; } +// DontRemoveFileOnSignal - The public API +void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) { + SignalsMutex.acquire(); + std::vector<sys::Path>::reverse_iterator I = + std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename); + if (I != FilesToRemove.rend()) + FilesToRemove.erase(I.base()-1); + SignalsMutex.release(); +} + /// AddSignalHandler - Add a function to be called when a signal is delivered /// to the process. The handler can have a cookie passed to it to identify /// what instance of the handler it is. @@ -253,3 +263,37 @@ void llvm::sys::PrintStackTraceOnErrorSignal() { AddSignalHandler(PrintStackTrace, 0); } + +/***/ + +// On Darwin, raise sends a signal to the main thread instead of the current +// thread. This has the unfortunate effect that assert() and abort() will end up +// bypassing our crash recovery attempts. We work around this for anything in +// the same linkage unit by just defining our own versions of the assert handler +// and abort. + +#ifdef __APPLE__ + +void __assert_rtn(const char *func, + const char *file, + int line, + const char *expr) { + if (func) + fprintf(stderr, "Assertion failed: (%s), function %s, file %s, line %d.\n", + expr, func, file, line); + else + fprintf(stderr, "Assertion failed: (%s), file %s, line %d.\n", + expr, file, line); + abort(); +} + +#include <signal.h> +#include <pthread.h> + +void abort() { + pthread_kill(pthread_self(), SIGABRT); + usleep(1000); + __builtin_trap(); +} + +#endif diff --git a/lib/System/Unix/ThreadLocal.inc b/lib/System/Unix/ThreadLocal.inc index 83d554d3077c7..6769520a6fb66 100644 --- a/lib/System/Unix/ThreadLocal.inc +++ b/lib/System/Unix/ThreadLocal.inc @@ -22,4 +22,5 @@ ThreadLocalImpl::ThreadLocalImpl() { } ThreadLocalImpl::~ThreadLocalImpl() { } void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);} const void* ThreadLocalImpl::getInstance() { return data; } +void ThreadLocalImpl::removeInstance() { setInstance(0); } } diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc index 379527d4ebf24..4a6dbd3ddf299 100644 --- a/lib/System/Win32/Path.inc +++ b/lib/System/Win32/Path.inc @@ -400,8 +400,10 @@ PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const { for (unsigned i = 0; i < path.length(); ++i) status.uniqueID += path[i]; - __int64 ft = *reinterpret_cast<__int64*>(&fi.ftLastWriteTime); - status.modTime.fromWin32Time(ft); + ULARGE_INTEGER ui; + ui.LowPart = fi.ftLastWriteTime.dwLowDateTime; + ui.HighPart = fi.ftLastWriteTime.dwHighDateTime; + status.modTime.fromWin32Time(ui.QuadPart); status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY; fsIsValid = true; @@ -720,7 +722,7 @@ Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const { bool Path::getMagicNumber(std::string& Magic, unsigned len) const { assert(len < 1024 && "Request for magic string too long"); - char* buf = (char*) alloca(1 + len); + char* buf = reinterpret_cast<char*>(alloca(len)); HANDLE h = CreateFile(path.c_str(), GENERIC_READ, @@ -739,8 +741,7 @@ bool Path::getMagicNumber(std::string& Magic, unsigned len) const { if (!ret || nRead != len) return false; - buf[len] = '\0'; - Magic = buf; + Magic = std::string(buf, len); return true; } @@ -777,8 +778,11 @@ Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const { return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: "); } + ULARGE_INTEGER ui; + ui.QuadPart = si.modTime.toWin32Time(); FILETIME ft; - (uint64_t&)ft = si.modTime.toWin32Time(); + ft.dwLowDateTime = ui.LowPart; + ft.dwHighDateTime = ui.HighPart; BOOL ret = SetFileTime(h, NULL, &ft, &ft); DWORD err = GetLastError(); CloseHandle(h); diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc index d6db71ba4f359..2498a26ea99c5 100644 --- a/lib/System/Win32/Signals.inc +++ b/lib/System/Win32/Signals.inc @@ -140,6 +140,20 @@ bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) { return false; } +// DontRemoveFileOnSignal - The public API +void sys::DontRemoveFileOnSignal(const sys::Path &Filename) { + if (FilesToRemove == NULL) + return; + + FilesToRemove->push_back(Filename); + std::vector<sys::Path>::reverse_iterator I = + std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename); + if (I != FilesToRemove->rend()) + FilesToRemove->erase(I.base()-1); + + LeaveCriticalSection(&CriticalSection); +} + /// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or /// SIGSEGV) is delivered to the process, print a stack trace and then exit. void sys::PrintStackTraceOnErrorSignal() { diff --git a/lib/System/Win32/ThreadLocal.inc b/lib/System/Win32/ThreadLocal.inc index c8f7840b00387..b8b933c4d29d9 100644 --- a/lib/System/Win32/ThreadLocal.inc +++ b/lib/System/Win32/ThreadLocal.inc @@ -46,4 +46,8 @@ void ThreadLocalImpl::setInstance(const void* d){ assert(errorcode != 0); } +void ThreadLocalImpl::removeInstance() { + setInstance(0); +} + } diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 14825a785649d..271ca44c2b69d 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -30,22 +30,22 @@ class formatted_raw_ostream; namespace ARMCC { // The CondCodes constants map directly to the 4-bit encoding of the // condition field for predicated instructions. - enum CondCodes { - EQ, - NE, - HS, - LO, - MI, - PL, - VS, - VC, - HI, - LS, - GE, - LT, - GT, - LE, - AL + enum CondCodes { // Meaning (integer) Meaning (floating-point) + EQ, // Equal Equal + NE, // Not equal Not equal, or unordered + HS, // Carry set >, ==, or unordered + LO, // Carry clear Less than + MI, // Minus, negative Less than + PL, // Plus, positive or zero >, ==, or unordered + VS, // Overflow Unordered + VC, // No overflow Not unordered + HI, // Unsigned higher Greater than, or unordered + LS, // Unsigned lower or same Less than or equal + GE, // Greater than or equal Greater than or equal + LT, // Less than Less than, or unordered + GT, // Greater than Greater than + LE, // Less than or equal <, ==, or unordered + AL // Always (unconditional) Always (unconditional) }; inline static CondCodes getOppositeCondition(CondCodes CC) { @@ -90,6 +90,33 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { } } +namespace ARM_MB { + // The Memory Barrier Option constants map directly to the 4-bit encoding of + // the option field for memory barrier operations. + enum MemBOpt { + ST = 14, + ISH = 11, + ISHST = 10, + NSH = 7, + NSHST = 6, + OSH = 3, + OSHST = 2 + }; + + inline static const char *MemBOptToString(unsigned val) { + switch (val) { + default: llvm_unreachable("Unknown memory opetion"); + case ST: return "st"; + case ISH: return "ish"; + case ISHST: return "ishst"; + case NSH: return "nsh"; + case NSHST: return "nshst"; + case OSH: return "osh"; + case OSHST: return "oshst"; + } + } +} // namespace ARM_MB + FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); @@ -98,6 +125,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); +FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); FunctionPass *createNEONPreAllocPass(); FunctionPass *createNEONMoveFixPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index fa64d6c2a4b4d..d6a8f19724dc9 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -1,4 +1,4 @@ -//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===// +//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -20,20 +20,6 @@ include "llvm/Target/Target.td" // ARM Subtarget features. // -def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", - "ARM v4T">; -def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T", - "ARM v5T">; -def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", - "ARM v5TE, v5TEj, v5TExp">; -def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", - "ARM v6">; -def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", - "ARM v6t2">; -def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", - "ARM v7A">; -def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", - "ARM v7M">; def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", "Enable VFP2 instructions">; def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3", @@ -42,14 +28,20 @@ def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON", "Enable NEON instructions">; def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", "Enable Thumb2 instructions">; +def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", + "Does not support ARM mode execution">; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; -def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", +def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", "Enable Thumb2 extract and pack instructions">; +def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", + "Has data barrier (dmb / dsb) instructions">; def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; +def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", + "Floating point unit supports single precision only">; // Some processors have multiply-accumulate instructions that don't // play nicely with other VFP instructions, and it's generally better @@ -57,14 +49,41 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", // FIXME: Currently, this is only flagged for Cortex-A8. It may be true for // others as well. We should do more benchmarking and confirm one way or // the other. -def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true", - "Disable VFP MAC instructions">; +def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true", + "Disable VFP MAC instructions">; // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", "true", "Use NEON for single precision FP">; +// Disable 32-bit to 16-bit narrowing for experimentation. +def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", + "Prefer 32-bit Thumb instrs">; + + +// ARM architectures. +def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", + "ARM v4T">; +def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T", + "ARM v5T">; +def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", + "ARM v5TE, v5TEj, v5TExp">; +def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", + "ARM v6">; +def ArchV6M : SubtargetFeature<"v6m", "ARMArchVersion", "V6M", + "ARM v6m", + [FeatureNoARM, FeatureDB]>; +def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", + "ARM v6t2", + [FeatureThumb2]>; +def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", + "ARM v7A", + [FeatureThumb2, FeatureNEON, FeatureDB]>; +def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", + "ARM v7M", + [FeatureThumb2, FeatureNoARM, FeatureDB, + FeatureHWDiv]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -122,20 +141,23 @@ def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>; def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; +// V6M Processors. +def : Processor<"cortex-m0", ARMV6Itineraries, [ArchV6M]>; + // V6T2 Processors. -def : Processor<"arm1156t2-s", ARMV6Itineraries, - [ArchV6T2, FeatureThumb2]>; -def : Processor<"arm1156t2f-s", ARMV6Itineraries, - [ArchV6T2, FeatureThumb2, FeatureVFP2]>; +def : Processor<"arm1156t2-s", ARMV6Itineraries, [ArchV6T2]>; +def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>; // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, - FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>; + [ArchV7A, FeatureHasSlowVMLx, + FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>; -def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; -def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; + [ArchV7A, FeatureT2XtPk]>; + +// V7M Processors. +def : ProcNoItin<"cortex-m3", [ArchV7M]>; +def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 92a13f1d751ca..db481005b3a4c 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -458,6 +458,7 @@ namespace ARM_AM { // IB - Increment before // DA - Decrement after // DB - Decrement before + // For VFP instructions, only the IA and DB modes are valid. static inline AMSubMode getAM4SubMode(unsigned Mode) { return (AMSubMode)(Mode & 0x7); @@ -477,14 +478,6 @@ namespace ARM_AM { // // The first operand is always a Reg. The second operand encodes the // operation in bit 8 and the immediate in bits 0-7. - // - // This is also used for FP load/store multiple ops. The second operand - // encodes the number of registers (or 2 times the number of registers - // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the - // following two sub-modes: - // - // IA - Increment after - // DB - Decrement before /// getAM5Opc - This function encodes the addrmode5 opc field. static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { @@ -498,17 +491,6 @@ namespace ARM_AM { return ((AM5Opc >> 8) & 1) ? sub : add; } - /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and - /// VSTM instructions. - static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) { - assert((SubMode == ia || SubMode == db) && - "Illegal addressing mode 5 sub-mode!"); - return ((int)SubMode << 8) | Offset; - } - static inline AMSubMode getAM5SubMode(unsigned AM5Opc) { - return (AMSubMode)((AM5Opc >> 8) & 0x7); - } - //===--------------------------------------------------------------------===// // Addressing Mode #6 //===--------------------------------------------------------------------===// diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 946f4744f5bbc..6cfd5961149fd 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -17,7 +17,7 @@ #include "ARMBuildAttrs.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" -#include "ARMInstPrinter.h" +#include "AsmPrinter/ARMInstPrinter.h" #include "ARMMachineFunctionInfo.h" #include "ARMMCInstLower.h" #include "ARMTargetMachine.h" @@ -47,6 +47,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <cctype> @@ -56,6 +57,15 @@ static cl::opt<bool> EnableMCInst("enable-arm-mcinst-printer", cl::Hidden, cl::desc("enable experimental asmprinter gunk in the arm backend")); +namespace llvm { + namespace ARM { + enum DW_ISA { + DW_ISA_ARM_thumb = 1, + DW_ISA_ARM_arm = 2 + }; + } +} + namespace { class ARMAsmPrinter : public AsmPrinter { @@ -80,9 +90,9 @@ namespace { virtual const char *getPassName() const { return "ARM Assembly Printer"; } - + void printInstructionThroughMCStreamer(const MachineInstr *MI); - + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, const char *Modifier = 0); @@ -110,8 +120,12 @@ namespace { void printAddrModePCOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, const char *Modifier = 0); - void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum, - raw_ostream &O); + void printBitfieldInvMaskImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); + void printMemBOption(const MachineInstr *MI, int OpNum, + raw_ostream &O); + void printShiftImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); @@ -190,12 +204,32 @@ namespace { virtual void EmitInstruction(const MachineInstr *MI); bool runOnMachineFunction(MachineFunction &F); - + virtual void EmitConstantPool() {} // we emit constant pools customly! virtual void EmitFunctionEntryLabel(); void EmitStartOfAsmFile(Module &M); void EmitEndOfAsmFile(Module &M); + MachineLocation getDebugValueLocation(const MachineInstr *MI) const { + MachineLocation Location; + assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + else { + DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + } + return Location; + } + + virtual unsigned getISAEncoding() { + // ARM/Darwin adds ISA to the DWARF info for each function. + if (!Subtarget->isTargetDarwin()) + return 0; + return Subtarget->isThumb() ? + llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm; + } + MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, const MachineBasicBlock *MBB) const; MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const; @@ -208,7 +242,7 @@ namespace { EmitMachineConstantPoolValue(MCPV, OS); OutStreamer.EmitRawText(OS.str()); } - + void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV, raw_ostream &O) { switch (TM.getTargetData()->getTypeAllocSize(MCPV->getType())) { @@ -234,7 +268,7 @@ namespace { // FIXME: Remove this when Darwin transition to @GOT like syntax. MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); O << *Sym; - + MachineModuleInfoMachO &MMIMachO = MMI->getObjFileInfo<MachineModuleInfoMachO>(); MachineModuleInfoImpl::StubValueTy &StubSym = @@ -278,7 +312,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitRawText(OS.str()); } } - + OutStreamer.EmitLabel(CurrentFnSym); } @@ -358,7 +392,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, case MachineOperand::MO_ExternalSymbol: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); O << *GetExternalSymbolSymbol(MO.getSymbolName()); - + if (isCallOp && Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_) O << "(PLT)"; @@ -438,15 +472,13 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op, O << getRegisterName(MO1.getReg()); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())) - << " "; - + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (MO2.getReg()) { - O << getRegisterName(MO2.getReg()); + O << ' ' << getRegisterName(MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else { - O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } else if (ShOpc != ARM_AM::rrx) { + O << " #" << ARM_AM::getSORegOffset(MO3.getImm()); } } @@ -575,16 +607,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - if (Modifier && strcmp(Modifier, "submode") == 0) { - ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - O << ARM_AM::getAMSubModeStr(Mode); - return; - } else if (Modifier && strcmp(Modifier, "base") == 0) { - // Used for FSTM{D|S} and LSTM{D|S} operations. - O << getRegisterName(MO1.getReg()); - return; - } - O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { @@ -641,6 +663,32 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op, O << "#" << lsb << ", #" << width; } +void +ARMAsmPrinter::printMemBOption(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + unsigned val = MI->getOperand(OpNum).getImm(); + O << ARM_MB::MemBOptToString(val); +} + +void ARMAsmPrinter::printShiftImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + unsigned ShiftOp = MI->getOperand(OpNum).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + switch (Opc) { + case ARM_AM::no_shift: + return; + case ARM_AM::lsl: + O << ", lsl #"; + break; + case ARM_AM::asr: + O << ", asr #"; + break; + default: + assert(0 && "unexpected shift opcode for shift immediate operand"); + } + O << ARM_AM::getSORegOffset(ShiftOp); +} + //===--------------------------------------------------------------------===// void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op, @@ -737,12 +785,11 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum, O << getRegisterName(Reg); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())) - << " "; - assert(MO2.isImm() && "Not a valid t2_so_reg value!"); - O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); + if (ShOpc != ARM_AM::rrx) + O << " #" << ARM_AM::getSORegOffset(MO2.getImm()); } void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI, @@ -916,12 +963,12 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum, const MachineOperand &MO1 = MI->getOperand(OpNum); const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id - + unsigned JTI = MO1.getIndex(); MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); // Can't use EmitLabel until instprinter happens, label comes out in the wrong // order. - O << *JTISymbol << ":\n"; + O << "\n" << *JTISymbol << ":\n"; const char *JTEntryDirective = MAI->getData32bitsDirective(); @@ -958,12 +1005,12 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum, const MachineOperand &MO1 = MI->getOperand(OpNum); const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); - + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); - + // Can't use EmitLabel until instprinter happens, label comes out in the wrong // order. - O << *JTISymbol << ":\n"; + O << "\n" << *JTISymbol << ":\n"; const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); @@ -980,7 +1027,7 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum, O << MAI->getData8bitsDirective(); else if (HalfWordOffset) O << MAI->getData16bitsDirective(); - + if (ByteOffset || HalfWordOffset) O << '(' << *MBB->getSymbol() << "-" << *JTISymbol << ")/2"; else @@ -1086,10 +1133,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { printInstructionThroughMCStreamer(MI); return; } - + if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY) EmitAlignment(2); - + SmallString<128> Str; raw_svector_ostream OS(Str); if (MI->getOpcode() == ARM::DBG_VALUE) { @@ -1112,7 +1159,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { printInstruction(MI, OS); OutStreamer.EmitRawText(OS.str()); - + // Make sure the instruction that follows TBB is 2-byte aligned. // FIXME: Constant island pass should insert an "ALIGN" instruction instead. if (MI->getOpcode() == ARM::t2TBB) @@ -1129,7 +1176,7 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { // avoid out-of-range branches that are due a fundamental limitation of // the way symbol offsets are encoded with the current Darwin ARM // relocations. - const TargetLoweringObjectFileMachO &TLOFMacho = + const TargetLoweringObjectFileMachO &TLOFMacho = static_cast<const TargetLoweringObjectFileMachO &>( getObjFileLowering()); OutStreamer.SwitchSection(TLOFMacho.getTextSection()); @@ -1148,6 +1195,12 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { 16, SectionKind::getText()); OutStreamer.SwitchSection(sect); } + const MCSection *StaticInitSect = + OutContext.getMachOSection("__TEXT", "__StaticInit", + MCSectionMachO::S_REGULAR | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + OutStreamer.SwitchSection(StaticInitSect); } } @@ -1173,8 +1226,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer.EmitRawText("\t.eabi_attribute " + Twine(ARMBuildAttrs::ABI_FP_exceptions) + ", 1"); } - - if (FiniteOnlyFPMath()) + + if (NoInfsFPMath && NoNaNsFPMath) OutStreamer.EmitRawText("\t.eabi_attribute " + Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 1"); else @@ -1280,7 +1333,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // LPC0: // add r0, pc, r0 // This adds the address of LPC0 to r0. - + // Emit the label. // FIXME: MOVE TO SHARED PLACE. unsigned Id = (unsigned)MI->getOperand(2).getImm(); @@ -1288,8 +1341,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix) + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id)); OutStreamer.EmitLabel(Label); - - + + // Form and emit tha dd. MCInst AddInst; AddInst.setOpcode(ARM::ADDrr); @@ -1315,7 +1368,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); else EmitGlobalConstant(MCPE.Val.ConstVal); - + return; } case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file. @@ -1325,13 +1378,13 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); - + { MCInst TmpInst; TmpInst.setOpcode(ARM::MOVi); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1)); - + // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); @@ -1349,11 +1402,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - + TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out OutStreamer.EmitInstruction(TmpInst); } - return; + return; } case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file. // This is a hack that lowers as a two instruction sequence. @@ -1384,32 +1437,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.setOpcode(ARM::MOVi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg TmpInst.addOperand(V1); // lower16(imm) - + // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - + OutStreamer.EmitInstruction(TmpInst); } - + { MCInst TmpInst; TmpInst.setOpcode(ARM::MOVTi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg TmpInst.addOperand(V2); // upper16(imm) - + // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - + OutStreamer.EmitInstruction(TmpInst); } - + return; } } - + MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); OutStreamer.EmitInstruction(TmpInst); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 49c16f3e07205..3a8bebe0dd247 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -15,9 +15,9 @@ #include "ARM.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" -#include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "ARMGenInstrInfo.inc" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/GlobalValue.h" @@ -501,7 +501,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { llvm_unreachable("Unknown or unset size field for instr!"); case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::DBG_VALUE: return 0; @@ -573,48 +573,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 0; // Not reached } -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -/// -bool -ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned& SrcSubIdx, unsigned& DstSubIdx) const { - switch (MI.getOpcode()) { - default: break; - case ARM::VMOVS: - case ARM::VMOVD: - case ARM::VMOVDneon: - case ARM::VMOVQ: - case ARM::VMOVQQ : { - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SrcSubIdx = MI.getOperand(1).getSubReg(); - DstSubIdx = MI.getOperand(0).getSubReg(); - return true; - } - case ARM::MOVr: - case ARM::MOVr_TC: - case ARM::tMOVr: - case ARM::tMOVgpr2tgpr: - case ARM::tMOVtgpr2gpr: - case ARM::tMOVgpr2gpr: - case ARM::t2MOVr: { - assert(MI.getDesc().getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "Invalid ARM MOV instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SrcSubIdx = MI.getOperand(1).getSubReg(); - DstSubIdx = MI.getOperand(0).getSubReg(); - return true; - } - } - - return false; -} - unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { @@ -763,8 +721,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Align); // tGPR is used sometimes in ARM instructions that need to avoid using - // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) + // certain registers. Just treat it as GPR here. Likewise, rGPR. + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass + || RC == ARM::rGPRRegisterClass) RC = ARM::GPRRegisterClass; switch (RC->getID()) { @@ -798,7 +757,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addMemOperand(MMO)); } break; @@ -818,7 +777,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); @@ -830,7 +789,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); @@ -865,7 +824,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass + || RC == ARM::rGPRRegisterClass) RC = ARM::GPRRegisterClass; switch (RC->getID()) { @@ -893,7 +853,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addMemOperand(MMO)); } break; @@ -910,7 +870,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); @@ -922,7 +882,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); @@ -963,6 +923,11 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { unsigned PCLabelId = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *NewCPV = 0; + // FIXME: The below assumes PIC relocation model and that the function + // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and + // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR + // instructions, so that's probably OK, but is PIC always correct when + // we get here? if (ACPV->isGlobalValue()) NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, ARMCP::CPValue, 4); @@ -972,6 +937,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { else if (ACPV->isBlockAddress()) NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, ARMCP::CPBlockAddress, 4); + else if (ACPV->isLSDA()) + NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId, + ARMCP::CPLSDA, 4); else llvm_unreachable("Unexpected ARM constantpool value type!!"); CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); @@ -1393,3 +1361,63 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Offset = (isSub) ? -Offset : Offset; return Offset == 0; } + +bool ARMBaseInstrInfo:: +AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpValue) const { + switch (MI->getOpcode()) { + default: break; + case ARM::CMPri: + case ARM::CMPzri: + case ARM::t2CMPri: + case ARM::t2CMPzri: + SrcReg = MI->getOperand(0).getReg(); + CmpValue = MI->getOperand(1).getImm(); + return true; + } + + return false; +} + +/// ConvertToSetZeroFlag - Convert the instruction to set the "zero" flag so +/// that we can remove a "comparison with zero". +bool ARMBaseInstrInfo:: +ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const { + // Conservatively refuse to convert an instruction which isn't in the same BB + // as the comparison. + if (MI->getParent() != CmpInstr->getParent()) + return false; + + // Check that CPSR isn't set between the comparison instruction and the one we + // want to change. + MachineBasicBlock::const_iterator I = CmpInstr, E = MI; + --I; + for (; I != E; --I) { + const MachineInstr &Instr = *I; + + for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { + const MachineOperand &MO = Instr.getOperand(IO); + if (!MO.isReg() || !MO.isDef()) continue; + + // This instruction modifies CPSR before the one we want to change. We + // can't do this transformation. + if (MO.getReg() == ARM::CPSR) + return false; + } + } + + // Set the "zero" bit in CPSR. + switch (MI->getOpcode()) { + default: break; + case ARM::ADDri: + case ARM::SUBri: + case ARM::t2ADDri: + case ARM::t2SUBri: + MI->RemoveOperand(5); + MachineInstrBuilder(MI) + .addReg(ARM::CPSR, RegState::Define | RegState::Implicit); + CmpInstr->eraseFromParent(); + return true; + } + + return false; +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 89a2db74a75ef..b4f4a33a70adf 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -15,11 +15,12 @@ #define ARMBASEINSTRUCTIONINFO_H #include "ARM.h" -#include "ARMRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetInstrInfo.h" namespace llvm { + class ARMSubtarget; + class ARMBaseRegisterInfo; /// ARMII - This namespace holds all of the target specific flags that /// instruction info tracks. @@ -97,44 +98,45 @@ namespace ARMII { // Miscellaneous arithmetic instructions ArithMiscFrm = 12 << FormShift, + SatFrm = 13 << FormShift, // Extend instructions - ExtFrm = 13 << FormShift, + ExtFrm = 14 << FormShift, // VFP formats - VFPUnaryFrm = 14 << FormShift, - VFPBinaryFrm = 15 << FormShift, - VFPConv1Frm = 16 << FormShift, - VFPConv2Frm = 17 << FormShift, - VFPConv3Frm = 18 << FormShift, - VFPConv4Frm = 19 << FormShift, - VFPConv5Frm = 20 << FormShift, - VFPLdStFrm = 21 << FormShift, - VFPLdStMulFrm = 22 << FormShift, - VFPMiscFrm = 23 << FormShift, + VFPUnaryFrm = 15 << FormShift, + VFPBinaryFrm = 16 << FormShift, + VFPConv1Frm = 17 << FormShift, + VFPConv2Frm = 18 << FormShift, + VFPConv3Frm = 19 << FormShift, + VFPConv4Frm = 20 << FormShift, + VFPConv5Frm = 21 << FormShift, + VFPLdStFrm = 22 << FormShift, + VFPLdStMulFrm = 23 << FormShift, + VFPMiscFrm = 24 << FormShift, // Thumb format - ThumbFrm = 24 << FormShift, + ThumbFrm = 25 << FormShift, // Miscelleaneous format - MiscFrm = 25 << FormShift, + MiscFrm = 26 << FormShift, // NEON formats - NGetLnFrm = 26 << FormShift, - NSetLnFrm = 27 << FormShift, - NDupFrm = 28 << FormShift, - NLdStFrm = 29 << FormShift, - N1RegModImmFrm= 30 << FormShift, - N2RegFrm = 31 << FormShift, - NVCVTFrm = 32 << FormShift, - NVDupLnFrm = 33 << FormShift, - N2RegVShLFrm = 34 << FormShift, - N2RegVShRFrm = 35 << FormShift, - N3RegFrm = 36 << FormShift, - N3RegVShFrm = 37 << FormShift, - NVExtFrm = 38 << FormShift, - NVMulSLFrm = 39 << FormShift, - NVTBLFrm = 40 << FormShift, + NGetLnFrm = 27 << FormShift, + NSetLnFrm = 28 << FormShift, + NDupFrm = 29 << FormShift, + NLdStFrm = 30 << FormShift, + N1RegModImmFrm= 31 << FormShift, + N2RegFrm = 32 << FormShift, + NVCVTFrm = 33 << FormShift, + NVDupLnFrm = 34 << FormShift, + N2RegVShLFrm = 35 << FormShift, + N2RegVShRFrm = 36 << FormShift, + N3RegFrm = 37 << FormShift, + N3RegVShFrm = 38 << FormShift, + NVExtFrm = 39 << FormShift, + NVMulSLFrm = 40 << FormShift, + NVTBLFrm = 41 << FormShift, //===------------------------------------------------------------------===// // Misc flags. @@ -198,7 +200,7 @@ namespace ARMII { } class ARMBaseInstrInfo : public TargetInstrInfoImpl { - const ARMSubtarget& Subtarget; + const ARMSubtarget &Subtarget; protected: // Can be only subclassed. explicit ARMBaseInstrInfo(const ARMSubtarget &STI); @@ -223,7 +225,7 @@ public: virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const; + bool AllowModify = false) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, @@ -262,12 +264,6 @@ public: /// virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; virtual unsigned isStoreToStackSlot(const MachineInstr *MI, @@ -341,6 +337,17 @@ public: unsigned NumInstrs) const { return NumInstrs && NumInstrs == 1; } + + /// AnalyzeCompare - For a comparison instruction, return the source register + /// in SrcReg and the value it compares against in CmpValue. Return true if + /// the comparison instruction can be analyzed. + virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + int &CmpValue) const; + + /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so + /// that we can remove a "comparison with zero". + virtual bool ConvertToSetZeroFlag(MachineInstr *Instr, + MachineInstr *CmpInstr) const; }; static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 182bd99371457..eceafad63f17a 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -40,13 +40,20 @@ #include "llvm/Support/CommandLine.h" namespace llvm { -cl::opt<bool> -ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true), - cl::desc("Reuse repeated frame index values")); +static cl::opt<bool> +ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false), + cl::desc("Force use of virtual base registers for stack load/store")); +static cl::opt<bool> +EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden, + cl::desc("Enable pre-regalloc stack frame index allocation")); } using namespace llvm; +static cl::opt<bool> +EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true), + cl::desc("Enable use of a base pointer for complex stack frames")); + unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, bool *isSPVFP) { if (isSPVFP) @@ -143,7 +150,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), TII(tii), STI(sti), - FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) { + FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), + BasePtr(ARM::R6) { } const unsigned* @@ -176,8 +184,11 @@ getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); - if (STI.isTargetDarwin() || hasFP(MF)) + Reserved.set(ARM::FPSCR); + if (hasFP(MF)) Reserved.set(FramePtr); + if (hasBasePointer(MF)) + Reserved.set(BasePtr); // Some targets reserve R9. if (STI.isR9Reserved()) Reserved.set(ARM::R9); @@ -191,9 +202,13 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, case ARM::SP: case ARM::PC: return true; + case ARM::R6: + if (hasBasePointer(MF)) + return true; + break; case ARM::R7: case ARM::R11: - if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF))) + if (FramePtr == Reg && hasFP(MF)) return true; break; case ARM::R9: @@ -510,7 +525,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, return std::make_pair(RC->allocation_order_begin(MF), RC->allocation_order_end(MF)); - if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!hasFP(MF)) { if (!STI.isR9Reserved()) return std::make_pair(GPREven1, GPREven1 + (sizeof(GPREven1)/sizeof(unsigned))); @@ -539,7 +554,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, return std::make_pair(RC->allocation_order_begin(MF), RC->allocation_order_end(MF)); - if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!hasFP(MF)) { if (!STI.isR9Reserved()) return std::make_pair(GPROdd1, GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned))); @@ -609,30 +624,68 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, /// or if frame pointer elimination is disabled. /// bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { + // Mac OS X requires FP not to be clobbered for backtracing purpose. + if (STI.isTargetDarwin()) + return true; + const MachineFrameInfo *MFI = MF.getFrameInfo(); - return ((DisableFramePointerElim(MF) && MFI->adjustsStack())|| + // Always eliminate non-leaf frame pointers. + return ((DisableFramePointerElim(MF) && MFI->hasCalls()) || needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); } +bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + if (!EnableBasePointer) + return false; + + if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) + return true; + + // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited + // negative range for ldr/str (255), and thumb1 is positive offsets only. + // It's going to be better to use the SP or Base Pointer instead. When there + // are variable sized objects, we can't reference off of the SP, so we + // reserve a Base Pointer. + if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) { + // Conservatively estimate whether the negative offset from the frame + // pointer will be sufficient to reach. If a function has a smallish + // frame, it's less likely to have lots of spills and callee saved + // space, so it's all more likely to be within range of the frame pointer. + // If it's wrong, the scavenger will still enable access to work, it just + // won't be optimal. + if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128) + return false; + return true; + } + + return false; +} + bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - return (RealignStack && - !AFI->isThumb1OnlyFunction() && - !MFI->hasVarSizedObjects()); + // We can't realign the stack if: + // 1. Dynamic stack realignment is explicitly disabled, + // 2. This is a Thumb1 function (it's not useful, so we don't bother), or + // 3. There are VLAs in the function and the base pointer is disabled. + return (RealignStack && !AFI->isThumb1OnlyFunction() && + (!MFI->hasVarSizedObjects() || EnableBasePointer)); } bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const Function *F = MF.getFunction(); unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - return (RealignStack && - !AFI->isThumb1OnlyFunction() && - (MFI->getMaxAlignment() > StackAlign) && - !MFI->hasVarSizedObjects()); + bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) || + F->hasFnAttr(Attribute::StackAlignment)); + + return requiresRealignment && canRealignStack(MF); } bool ARMBaseRegisterInfo:: @@ -668,6 +721,7 @@ static unsigned estimateStackSize(MachineFunction &MF) { /// instructions will require a scratch register during their expansion later. unsigned ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned Limit = (1 << 12) - 1; for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); @@ -693,7 +747,10 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { Limit = std::min(Limit, ((1U << 8) - 1) * 4); break; case ARMII::AddrModeT2_i12: - if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1); + // i12 supports only positive offset so these will be converted to + // i8 opcodes. See llvm::rewriteT2FrameIndex. + if (hasFP(MF) && AFI->hasStackFrame()) + Limit = std::min(Limit, (1U << 8) - 1); break; case ARMII::AddrMode6: // Addressing mode 6 (load/store) instructions can't encode an @@ -710,6 +767,19 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { return Limit; } +static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, + const ARMBaseInstrInfo &TII) { + unsigned FnSize = 0; + for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + const MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); + I != E; ++I) + FnSize += TII.GetInstSizeInBytes(I); + } + return FnSize; +} + void ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -737,6 +807,10 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0) MF.getRegInfo().setPhysRegUsed(ARM::LR); + // Spill the BasePtr if it's used. + if (hasBasePointer(MF)) + MF.getRegInfo().setPhysRegUsed(BasePtr); + // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. const unsigned *CSRegs = getCalleeSavedRegs(); @@ -807,7 +881,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, bool ForceLRSpill = false; if (!LRSpilled && AFI->isThumb1OnlyFunction()) { - unsigned FnSize = TII.GetFunctionSizeInBytes(MF); + unsigned FnSize = GetFunctionSizeInBytes(MF, TII); // Force LR to be spilled if the Thumb function size is > 2048. This enables // use of BL to implement far jump. If it turns out that it's not needed // then the branch fix up path will undo it. @@ -824,13 +898,19 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // slot of the previous FP. Also, if we have variable sized objects in the // function, stack slot references will often be negative, and some of // our instructions are positive-offset only, so conservatively consider - // that case to want a spill slot (or register) as well. + // that case to want a spill slot (or register) as well. Similarly, if + // the function adjusts the stack pointer during execution and the + // adjustments aren't already part of our stack size estimate, our offset + // calculations may be off, so be conservative. // FIXME: We could add logic to be more precise about negative offsets // and which instructions will need a scratch register for them. Is it // worth the effort and added fragility? bool BigStack = - (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >= - estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects(); + (RS && + (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= + estimateRSStackSizeLimit(MF))) + || MFI->hasVarSizedObjects() + || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); bool ExtraCSSpill = false; if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) { @@ -848,9 +928,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, ExtraCSSpill = true; } - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - if (STI.isTargetDarwin() || hasFP(MF)) { + if (hasFP(MF)) { MF.getRegInfo().setPhysRegUsed(FramePtr); NumGPRSpills++; } @@ -941,55 +1019,88 @@ unsigned ARMBaseRegisterInfo::getRARegister() const { return ARM::LR; } -unsigned +unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - if (STI.isTargetDarwin() || hasFP(MF)) + if (hasFP(MF)) return FramePtr; return ARM::SP; } +// Provide a base+offset reference to an FI slot for debug info. It's the +// same as what we use for resolving the code-gen references for now. +// FIXME: This can go wrong when references are SP-relative and simple call +// frames aren't used. int ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { + return ResolveFrameIndexReference(MF, FI, FrameReg, 0); +} + +int +ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg, + int SPAdj) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); + int FPOffset = Offset - AFI->getFramePtrSpillOffset(); bool isFixed = MFI->isFixedObjectIndex(FI); FrameReg = ARM::SP; + Offset += SPAdj; if (AFI->isGPRCalleeSavedArea1Frame(FI)) - Offset -= AFI->getGPRCalleeSavedArea1Offset(); + return Offset - AFI->getGPRCalleeSavedArea1Offset(); else if (AFI->isGPRCalleeSavedArea2Frame(FI)) - Offset -= AFI->getGPRCalleeSavedArea2Offset(); + return Offset - AFI->getGPRCalleeSavedArea2Offset(); else if (AFI->isDPRCalleeSavedAreaFrame(FI)) - Offset -= AFI->getDPRCalleeSavedAreaOffset(); - else if (needsStackRealignment(MF)) { - // When dynamically realigning the stack, use the frame pointer for - // parameters, and the stack pointer for locals. + return Offset - AFI->getDPRCalleeSavedAreaOffset(); + + // When dynamically realigning the stack, use the frame pointer for + // parameters, and the stack/base pointer for locals. + if (needsStackRealignment(MF)) { assert (hasFP(MF) && "dynamic stack realignment without a FP!"); if (isFixed) { FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); + Offset = FPOffset; + } else if (MFI->hasVarSizedObjects()) { + assert(hasBasePointer(MF) && + "VLAs and dynamic stack alignment, but missing base pointer!"); + FrameReg = BasePtr; } - } else if (hasFP(MF) && AFI->hasStackFrame()) { - if (isFixed || MFI->hasVarSizedObjects()) { - // Use frame pointer to reference fixed objects unless this is a - // frameless function. + return Offset; + } + + // If there is a frame pointer, use it when we can. + if (hasFP(MF) && AFI->hasStackFrame()) { + // Use frame pointer to reference fixed objects. Use it for locals if + // there are VLAs (and thus the SP isn't reliable as a base). + if (isFixed || (MFI->hasVarSizedObjects() && !hasBasePointer(MF))) { FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); + return FPOffset; + } else if (MFI->hasVarSizedObjects()) { + assert(hasBasePointer(MF) && "missing base pointer!"); + // Use the base register since we have it. + FrameReg = BasePtr; } else if (AFI->isThumb2Function()) { - // In Thumb2 mode, the negative offset is very limited. - int FPOffset = Offset - AFI->getFramePtrSpillOffset(); + // In Thumb2 mode, the negative offset is very limited. Try to avoid + // out of range references. if (FPOffset >= -255 && FPOffset < 0) { FrameReg = getFrameRegister(MF); - Offset = FPOffset; + return FPOffset; } + } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { + // Otherwise, use SP or FP, whichever is closer to the stack slot. + FrameReg = getFrameRegister(MF); + return FPOffset; } } + // Use the base pointer if we have one. + if (hasBasePointer(MF)) + FrameReg = BasePtr; return Offset; } - int ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { @@ -1024,7 +1135,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, case ARM::R5: return ARM::R4; case ARM::R7: - return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6; + return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) + ? 0 : ARM::R6; case ARM::R9: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; case ARM::R11: @@ -1113,7 +1225,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, case ARM::R4: return ARM::R5; case ARM::R6: - return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7; + return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6)) + ? 0 : ARM::R7; case ARM::R8: return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; case ARM::R10: @@ -1220,13 +1333,18 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const { return true; } +bool ARMBaseRegisterInfo:: +requiresVirtualBaseRegisters(const MachineFunction &MF) const { + return EnableLocalStackAlloc; +} + // hasReservedCallFrame - Under normal circumstances, when a frame pointer is // not required, we reserve argument space for call sites in the function // immediately on entry to the current function. This eliminates the need for // add/sub sp brackets around call sites. Returns true if the call frame is // included as part of the stack frame. bool ARMBaseRegisterInfo:: -hasReservedCallFrame(MachineFunction &MF) const { +hasReservedCallFrame(const MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); // It's not always a good idea to include the call frame as part of the @@ -1244,7 +1362,7 @@ hasReservedCallFrame(MachineFunction &MF) const { // is not sufficient here since we still may reference some objects via SP // even when FP is available in Thumb2 mode. bool ARMBaseRegisterInfo:: -canSimplifyCallFramePseudos(MachineFunction &MF) const { +canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); } @@ -1305,10 +1423,258 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -unsigned +int64_t ARMBaseRegisterInfo:: +getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { + const TargetInstrDesc &Desc = MI->getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + int64_t InstrOffs = 0;; + int Scale = 1; + unsigned ImmIdx = 0; + switch (AddrMode) { + case ARMII::AddrModeT2_i8: + case ARMII::AddrModeT2_i12: + // i8 supports only negative, and i12 supports only positive, so + // based on Offset sign, consider the appropriate instruction + InstrOffs = MI->getOperand(Idx+1).getImm(); + Scale = 1; + break; + case ARMII::AddrMode5: { + // VFP address mode. + const MachineOperand &OffOp = MI->getOperand(Idx+1); + InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm()); + if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub) + InstrOffs = -InstrOffs; + Scale = 4; + break; + } + case ARMII::AddrMode2: { + ImmIdx = Idx+2; + InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs = -InstrOffs; + break; + } + case ARMII::AddrMode3: { + ImmIdx = Idx+2; + InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs = -InstrOffs; + break; + } + case ARMII::AddrModeT1_s: { + ImmIdx = Idx+1; + InstrOffs = MI->getOperand(ImmIdx).getImm(); + Scale = 4; + break; + } + default: + llvm_unreachable("Unsupported addressing mode!"); + break; + } + + return InstrOffs * Scale; +} + +/// needsFrameBaseReg - Returns true if the instruction's frame index +/// reference would be better served by a base register other than FP +/// or SP. Used by LocalStackFrameAllocation to determine which frame index +/// references it should create new base registers for. +bool ARMBaseRegisterInfo:: +needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { + for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) { + assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); + } + + // It's the load/store FI references that cause issues, as it can be difficult + // to materialize the offset if it won't fit in the literal field. Estimate + // based on the size of the local frame and some conservative assumptions + // about the rest of the stack frame (note, this is pre-regalloc, so + // we don't know everything for certain yet) whether this offset is likely + // to be out of range of the immediate. Return true if so. + + // We only generate virtual base registers for loads and stores, so + // return false for everything else. + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case ARM::LDR: case ARM::LDRH: case ARM::LDRB: + case ARM::STR: case ARM::STRH: case ARM::STRB: + case ARM::t2LDRi12: case ARM::t2LDRi8: + case ARM::t2STRi12: case ARM::t2STRi8: + case ARM::VLDRS: case ARM::VLDRD: + case ARM::VSTRS: case ARM::VSTRD: + case ARM::tSTRspi: case ARM::tLDRspi: + if (ForceAllBaseRegAlloc) + return true; + break; + default: + return false; + } + + // Without a virtual base register, if the function has variable sized + // objects, all fixed-size local references will be via the frame pointer, + // Approximate the offset and see if it's legal for the instruction. + // Note that the incoming offset is based on the SP value at function entry, + // so it'll be negative. + MachineFunction &MF = *MI->getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + // Estimate an offset from the frame pointer. + // Conservatively assume all callee-saved registers get pushed. R4-R6 + // will be earlier than the FP, so we ignore those. + // R7, LR + int64_t FPOffset = Offset - 8; + // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15 + if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction()) + FPOffset -= 80; + // Estimate an offset from the stack pointer. + // The incoming offset is relating to the SP at the start of the function, + // but when we access the local it'll be relative to the SP after local + // allocation, so adjust our SP-relative offset by that allocation size. + Offset = -Offset; + Offset += MFI->getLocalFrameSize(); + // Assume that we'll have at least some spill slots allocated. + // FIXME: This is a total SWAG number. We should run some statistics + // and pick a real one. + Offset += 128; // 128 bytes of spill slots + + // If there is a frame pointer, try using it. + // The FP is only available if there is no dynamic realignment. We + // don't know for sure yet whether we'll need that, so we guess based + // on whether there are any local variables that would trigger it. + unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + if (hasFP(MF) && + !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { + if (isFrameOffsetLegal(MI, FPOffset)) + return false; + } + // If we can reference via the stack pointer, try that. + // FIXME: This (and the code that resolves the references) can be improved + // to only disallow SP relative references in the live range of + // the VLA(s). In practice, it's unclear how much difference that + // would make, but it may be worth doing. + if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset)) + return false; + + // The offset likely isn't legal, we want to allocate a virtual base register. + return true; +} + +/// materializeFrameBaseRegister - Insert defining instruction(s) for +/// BaseReg to be a pointer to FrameIdx before insertion point I. +void ARMBaseRegisterInfo:: +materializeFrameBaseRegister(MachineBasicBlock::iterator I, unsigned BaseReg, + int FrameIdx, int64_t Offset) const { + ARMFunctionInfo *AFI = + I->getParent()->getParent()->getInfo<ARMFunctionInfo>(); + unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : + (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri); + + MachineInstrBuilder MIB = + BuildMI(*I->getParent(), I, I->getDebugLoc(), TII.get(ADDriOpc), BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset); + if (!AFI->isThumb1OnlyFunction()) + AddDefaultCC(AddDefaultPred(MIB)); +} + +void +ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const { + MachineInstr &MI = *I; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + int Off = Offset; // ARM doesn't need the general 64-bit offsets + unsigned i = 0; + + assert(!AFI->isThumb1OnlyFunction() && + "This resolveFrameIndex does not support Thumb1!"); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + bool Done = false; + if (!AFI->isThumbFunction()) + Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII); + else { + assert(AFI->isThumb2Function()); + Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII); + } + assert (Done && "Unable to resolve frame index!"); +} + +bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + int64_t Offset) const { + const TargetInstrDesc &Desc = MI->getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + unsigned i = 0; + + while (!MI->getOperand(i).isFI()) { + ++i; + assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); + } + + // AddrMode4 and AddrMode6 cannot handle any offset. + if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) + return Offset == 0; + + unsigned NumBits = 0; + unsigned Scale = 1; + bool isSigned = true; + switch (AddrMode) { + case ARMII::AddrModeT2_i8: + case ARMII::AddrModeT2_i12: + // i8 supports only negative, and i12 supports only positive, so + // based on Offset sign, consider the appropriate instruction + Scale = 1; + if (Offset < 0) { + NumBits = 8; + Offset = -Offset; + } else { + NumBits = 12; + } + break; + case ARMII::AddrMode5: + // VFP address mode. + NumBits = 8; + Scale = 4; + break; + case ARMII::AddrMode2: + NumBits = 12; + break; + case ARMII::AddrMode3: + NumBits = 8; + break; + case ARMII::AddrModeT1_s: + NumBits = 5; + Scale = 4; + isSigned = false; + break; + default: + llvm_unreachable("Unsupported addressing mode!"); + break; + } + + Offset += getFrameIndexInstrOffset(MI, i); + // Make sure the offset is encodable for instructions that scale the + // immediate. + if ((Offset & (Scale-1)) != 0) + return false; + + if (isSigned && Offset < 0) + Offset = -Offset; + + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) + return true; + + return false; +} + +void ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); @@ -1325,16 +1691,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); unsigned FrameReg; - int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg); - if (FrameReg != ARM::SP) - SPAdj = 0; - Offset += SPAdj; + int Offset = ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); // Special handling of dbg_value instructions. if (MI.isDebugValue()) { MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); MI.getOperand(i+1).ChangeToImmediate(Offset); - return 0; + return; } // Modify MI as necessary to handle as much of 'Offset' as possible @@ -1346,7 +1709,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); } if (Done) - return 0; + return; // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above, handle the rest, providing a register that is @@ -1366,10 +1729,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); else { ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); - if (Value) { - Value->first = FrameReg; // use the frame register as a kind indicator - Value->second = Offset; - } if (!AFI->isThumbFunction()) emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, Pred, PredReg, TII); @@ -1379,10 +1738,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, Pred, PredReg, TII); } MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); - if (!ReuseFrameIndexVals) - ScratchReg = 0; } - return ScratchReg; } /// Move iterator past the next bunch of callee save load / store ops for @@ -1494,7 +1850,8 @@ emitPrologue(MachineFunction &MF) const { // Otherwise, if this is not Darwin, all the callee-saved registers go // into spill area 1, including the FP in R11. In either case, it is // now safe to emit this assignment. - if (STI.isTargetDarwin() || hasFP(MF)) { + bool HasFP = hasFP(MF); + if (HasFP) { unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) @@ -1513,7 +1870,7 @@ emitPrologue(MachineFunction &MF) const { unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - if (STI.isTargetDarwin() || hasFP(MF)) + if (HasFP) AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); @@ -1525,18 +1882,22 @@ emitPrologue(MachineFunction &MF) const { if (NumBytes) { // Adjust SP after all the callee-save spills. emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + if (HasFP) + AFI->setShouldRestoreSPFromFP(true); } if (STI.isTargetELF() && hasFP(MF)) { MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); + AFI->setShouldRestoreSPFromFP(true); } AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); - // If we need dynamic stack realignment, do it here. + // If we need dynamic stack realignment, do it here. Be paranoid and make + // sure if we also have VLAs, we have a base pointer for frame access. if (needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); @@ -1562,7 +1923,28 @@ emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(ARM::R4, RegState::Kill); } + + AFI->setShouldRestoreSPFromFP(true); + } + + // If we need a base pointer, set it up here. It's whatever the value + // of the stack pointer is at this point. Any variable size objects + // will be allocated after this, so we can still use the base pointer + // to reference locals. + if (hasBasePointer(MF)) { + if (isARM) + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), BasePtr) + .addReg(ARM::SP) + .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr) + .addReg(ARM::SP); } + + // If the frame has variable sized objects then the epilogue must restore + // the sp from fp. + if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects()) + AFI->setShouldRestoreSPFromFP(true); } static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { @@ -1617,34 +1999,25 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - bool HasFP = hasFP(MF); - if ((STI.isTargetDarwin() && NumBytes) || HasFP) { + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; - // Reset SP based on frame pointer only if the stack frame extends beyond - // frame pointer stack slot or target is ELF and the function has FP. - if (HasFP || - AFI->getGPRCalleeSavedArea2Size() || - AFI->getDPRCalleeSavedAreaSize() || - AFI->getDPRCalleeSavedAreaOffset()) { - if (NumBytes) { - if (isARM) - emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, - ARMCC::AL, 0, TII); - else - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, - ARMCC::AL, 0, TII); - } else { - // Thumb2 or ARM. - if (isARM) - BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) - .addReg(FramePtr) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); - else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) - .addReg(FramePtr); - } + if (NumBytes) { + if (isARM) + emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + else + emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + } else { + // Thumb2 or ARM. + if (isARM) + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) + .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) + .addReg(FramePtr); } } else if (NumBytes) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); @@ -1670,7 +2043,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi) { - BuildMI(MBB, MBBI, dl, + BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)). addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); @@ -1685,7 +2058,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { } else if (RetOpcode == ARM::TCRETURNriND) { BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). addReg(JumpTarget.getReg(), RegState::Kill); - } + } MachineInstr *NewMI = prior(MBBI); for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index f7ee0d5cc66d0..fa2eb6c10498e 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -44,7 +44,7 @@ static inline bool isARMLowRegister(unsigned Reg) { } } -struct ARMBaseRegisterInfo : public ARMGenRegisterInfo { +class ARMBaseRegisterInfo : public ARMGenRegisterInfo { protected: const ARMBaseInstrInfo &TII; const ARMSubtarget &STI; @@ -52,6 +52,11 @@ protected: /// FramePtr - ARM physical register used as frame ptr. unsigned FramePtr; + /// BasePtr - ARM physical register used as a base ptr in complex stack + /// frames. I.e., when we need a 3rd base, not just SP and FP, due to + /// variable size stack objects. + unsigned BasePtr; + // Can be only subclassed. explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); @@ -102,9 +107,18 @@ public: MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; + bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; + int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const; + bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const; + void materializeFrameBaseRegister(MachineBasicBlock::iterator I, + unsigned BaseReg, int FrameIdx, + int64_t Offset) const; + void resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const; + bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const; bool cannotEliminateFrame(const MachineFunction &MF) const; @@ -116,6 +130,8 @@ public: unsigned getFrameRegister(const MachineFunction &MF) const; int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; + int ResolveFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg, int SPAdj) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; // Exception handling queries. @@ -144,16 +160,17 @@ public: virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const; - virtual bool hasReservedCallFrame(MachineFunction &MF) const; - virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const; + virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const; + + virtual bool hasReservedCallFrame(const MachineFunction &MF) const; + virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const; virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; virtual void emitPrologue(MachineFunction &MF) const; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 8fdb07f81626a..293e32aa5376e 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -1,4 +1,4 @@ -//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===// +//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -68,7 +68,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[ "ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, - CCIfType<[i32], CCIfAlign<"8", CCAssignToStack<4, 8>>>, + CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[f64], CCAssignToStack<8, 8>>, CCIfType<[v2f64], CCAssignToStack<16, 8>> diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 7895cb0719229..b1a702f90cfc3 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -65,7 +65,7 @@ namespace { static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(&ID), JTI(0), + : MachineFunctionPass(ID), JTI(0), II((const ARMInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), @@ -124,6 +124,8 @@ namespace { void emitMiscArithInstruction(const MachineInstr &MI); + void emitSaturateInstruction(const MachineInstr &MI); + void emitBranchInstruction(const MachineInstr &MI); void emitInlineJumpTable(unsigned JTIndex); @@ -389,6 +391,9 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { case ARMII::ArithMiscFrm: emitMiscArithInstruction(MI); break; + case ARMII::SatFrm: + emitSaturateInstruction(MI); + break; case ARMII::BrFrm: emitBranchInstruction(MI); break; @@ -654,6 +659,19 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { switch (Opcode) { default: llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); + case ARM::BX: + case ARM::BMOVPCRX: + case ARM::BXr9: + case ARM::BMOVPCRXr9: { + // First emit mov lr, pc + unsigned Binary = 0x01a0e00f; + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + emitWordLE(Binary); + + // and then emit the branch. + emitMiscBranchInstruction(MI); + break; + } case TargetOpcode::INLINEASM: { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. @@ -662,7 +680,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { } break; } - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; @@ -1209,12 +1227,58 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { // Encode shift_imm. unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); + if (TID.Opcode == ARM::PKHTB) { + assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!"); + if (ShiftAmt == 32) + ShiftAmt = 0; + } assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); Binary |= ShiftAmt << ARMII::ShiftShift; emitWordLE(Binary); } +void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + + // Part of binary is determined by TableGen. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode saturate bit position. + unsigned Pos = MI.getOperand(1).getImm(); + if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16) + Pos -= 1; + assert((Pos < 16 || (Pos < 32 && + TID.Opcode != ARM::SSAT16 && + TID.Opcode != ARM::USAT16)) && + "saturate bit position out of range"); + Binary |= Pos << 16; + + // Encode Rm + Binary |= getMachineOpValue(MI, 2); + + // Encode shift_imm. + if (TID.getNumOperands() == 4) { + unsigned ShiftOp = MI.getOperand(3).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + if (Opc == ARM_AM::asr) + Binary |= (1 << 6); + unsigned ShiftAmt = MI.getOperand(3).getImm(); + if (ShiftAmt == 32 && Opc == ARM_AM::asr) + ShiftAmt = 0; + assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); + Binary |= ShiftAmt << ARMII::ShiftShift; + } + + emitWordLE(Binary); +} + void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); @@ -1485,7 +1549,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { // Set addressing mode by modifying bits U(23) and P(24) const MachineOperand &MO = MI.getOperand(OpIdx++); - Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm())); + Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm())); // Set bit W(21) if (IsUpdating) @@ -1494,7 +1558,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { // First register is encoded in Dd. Binary |= encodeVFPRd(MI, OpIdx+2); - // Number of registers are encoded in offset field. + // Count the number of registers. unsigned NumRegs = 1; for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 65a3da6f1617c..60e923bd2c385 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -18,9 +18,9 @@ #include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMInstrInfo.h" +#include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -165,7 +165,7 @@ namespace { /// HasInlineAsm - True if the function contains inline assembly. bool HasInlineAsm; - const TargetInstrInfo *TII; + const ARMInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; bool isThumb; @@ -173,7 +173,7 @@ namespace { bool isThumb2; public: static char ID; - ARMConstantIslands() : MachineFunctionPass(&ID) {} + ARMConstantIslands() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -272,7 +272,7 @@ FunctionPass *llvm::createARMConstantIslandPass() { bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { MachineConstantPool &MCP = *MF.getConstantPool(); - TII = MF.getTarget().getInstrInfo(); + TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo(); AFI = MF.getInfo<ARMFunctionInfo>(); STI = &MF.getTarget().getSubtarget<ARMSubtarget>(); @@ -323,6 +323,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // constant pool users. InitialFunctionScan(MF, CPEMIs); CPEMIs.clear(); + DEBUG(dumpBBs()); + /// Remove dead constant pool entries. RemoveUnusedCPEntries(); @@ -355,7 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { } // Shrink 32-bit Thumb2 branch, load, and store instructions. - if (isThumb2) + if (isThumb2 && !STI->prefers32BitThumb()) MadeChange |= OptimizeThumb2Instructions(MF); // After a while, this might be made debug-only, but it is not expensive. @@ -366,6 +368,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) MadeChange |= UndoLRSpillRestore(); + DEBUG(errs() << '\n'; dumpBBs()); + BBSizes.clear(); BBOffsets.clear(); WaterList.clear(); @@ -509,6 +513,10 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, case ARM::tBR_JTr: // A Thumb1 table jump may involve padding; for the offsets to // be right, functions containing these must be 4-byte aligned. + // tBR_JTr expands to a mov pc followed by .align 2 and then the jump + // table entries. So this code checks whether offset of tBR_JTr + 2 + // is aligned. That is held in Offset+MBBSize, which already has + // 2 added in for the size of the mov pc instruction. MF.EnsureAlignment(2U); if ((Offset+MBBSize)%4 != 0 || HasInlineAsm) // FIXME: Add a pseudo ALIGN instruction instead. @@ -768,28 +776,54 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { WaterList.insert(IP, OrigBB); NewWaterList.insert(OrigBB); - // Figure out how large the first NewMBB is. (It cannot - // contain a constpool_entry or tablejump.) - unsigned NewBBSize = 0; - for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); - I != E; ++I) - NewBBSize += TII->GetInstSizeInBytes(I); - unsigned OrigBBI = OrigBB->getNumber(); unsigned NewBBI = NewBB->getNumber(); - // Set the size of NewBB in BBSizes. - BBSizes[NewBBI] = NewBBSize; - // We removed instructions from UserMBB, subtract that off from its size. - // Add 2 or 4 to the block to count the unconditional branch we added to it. int delta = isThumb1 ? 2 : 4; - BBSizes[OrigBBI] -= NewBBSize - delta; + + // Figure out how large the OrigBB is. As the first half of the original + // block, it cannot contain a tablejump. The size includes + // the new jump we added. (It should be possible to do this without + // recounting everything, but it's very confusing, and this is rarely + // executed.) + unsigned OrigBBSize = 0; + for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end(); + I != E; ++I) + OrigBBSize += TII->GetInstSizeInBytes(I); + BBSizes[OrigBBI] = OrigBBSize; // ...and adjust BBOffsets for NewBB accordingly. BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI]; + // Figure out how large the NewMBB is. As the second half of the original + // block, it may contain a tablejump. + unsigned NewBBSize = 0; + for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); + I != E; ++I) + NewBBSize += TII->GetInstSizeInBytes(I); + // Set the size of NewBB in BBSizes. It does not include any padding now. + BBSizes[NewBBI] = NewBBSize; + + MachineInstr* ThumbJTMI = prior(NewBB->end()); + if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { + // We've added another 2-byte instruction before this tablejump, which + // means we will always need padding if we didn't before, and vice versa. + + // The original offset of the jump instruction was: + unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta; + if (OrigOffset%4 == 0) { + // We had padding before and now we don't. No net change in code size. + delta = 0; + } else { + // We didn't have padding before and now we do. + BBSizes[NewBBI] += 2; + delta = 4; + } + } + // All BBOffsets following these blocks must be modified. - AdjustBBOffsetsAfter(NewBB, delta); + if (delta) + AdjustBBOffsetsAfter(NewBB, delta); return NewBB; } @@ -915,6 +949,10 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, } // Thumb1 jump tables require padding. They should be at the end; // following unconditional branches are removed by AnalyzeBranch. + // tBR_JTr expands to a mov pc followed by .align 2 and then the jump + // table entries. So this code checks whether offset of tBR_JTr + // is aligned; if it is, the offset of the jump table following the + // instruction will not be aligned, and we need padding. MachineInstr *ThumbJTMI = prior(MBB->end()); if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { unsigned NewMIOffset = GetOffsetOf(ThumbJTMI); @@ -1143,11 +1181,13 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, MachineBasicBlock::iterator MI = UserMI; ++MI; unsigned CPUIndex = CPUserIndex+1; + unsigned NumCPUsers = CPUsers.size(); + MachineInstr *LastIT = 0; for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); Offset < BaseInsertOffset; Offset += TII->GetInstSizeInBytes(MI), - MI = llvm::next(MI)) { - if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) { + MI = llvm::next(MI)) { + if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { CPUser &U = CPUsers[CPUIndex]; if (!OffsetIsInRange(Offset, EndInsertOffset, U.MaxDisp, U.NegOk, U.IsSoImm)) { @@ -1159,9 +1199,23 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm(); CPUIndex++; } + + // Remember the last IT instruction. + if (MI->getOpcode() == ARM::t2IT) + LastIT = MI; } + DEBUG(errs() << "Split in middle of big block\n"); - NewMBB = SplitBlockBeforeInstr(prior(MI)); + --MI; + + // Avoid splitting an IT block. + if (LastIT) { + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + if (CC != ARMCC::AL) + MI = LastIT; + } + NewMBB = SplitBlockBeforeInstr(MI); } } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 9c62597b4323b..fc2e3c3fadaea 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -19,14 +19,21 @@ #include "ARMBaseInstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" - +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; namespace { class ARMExpandPseudo : public MachineFunctionPass { + // Constants for register spacing in NEON load/store instructions. + enum NEONRegSpacing { + SingleSpc, + EvenDblSpc, + OddDblSpc + }; + public: static char ID; - ARMExpandPseudo() : MachineFunctionPass(&ID) {} + ARMExpandPseudo() : MachineFunctionPass(ID) {} const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -41,6 +48,10 @@ namespace { void TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMBB(MachineBasicBlock &MBB); + void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc, + bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs); + void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc, + bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs); }; char ARMExpandPseudo::ID = 0; } @@ -63,6 +74,129 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, } } +/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register +/// operands to real VLD instructions with D register operands. +void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI, + unsigned Opc, bool hasWriteBack, + NEONRegSpacing RegSpc, unsigned NumRegs) { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + unsigned OpIdx = 0; + + bool DstIsDead = MI.getOperand(OpIdx).isDead(); + unsigned DstReg = MI.getOperand(OpIdx++).getReg(); + unsigned D0, D1, D2, D3; + if (RegSpc == SingleSpc) { + D0 = TRI->getSubReg(DstReg, ARM::dsub_0); + D1 = TRI->getSubReg(DstReg, ARM::dsub_1); + D2 = TRI->getSubReg(DstReg, ARM::dsub_2); + D3 = TRI->getSubReg(DstReg, ARM::dsub_3); + } else if (RegSpc == EvenDblSpc) { + D0 = TRI->getSubReg(DstReg, ARM::dsub_0); + D1 = TRI->getSubReg(DstReg, ARM::dsub_2); + D2 = TRI->getSubReg(DstReg, ARM::dsub_4); + D3 = TRI->getSubReg(DstReg, ARM::dsub_6); + } else { + assert(RegSpc == OddDblSpc && "unknown register spacing for VLD"); + D0 = TRI->getSubReg(DstReg, ARM::dsub_1); + D1 = TRI->getSubReg(DstReg, ARM::dsub_3); + D2 = TRI->getSubReg(DstReg, ARM::dsub_5); + D3 = TRI->getSubReg(DstReg, ARM::dsub_7); + } + MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 2) + MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 3) + MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); + + if (hasWriteBack) { + bool WBIsDead = MI.getOperand(OpIdx).isDead(); + unsigned WBReg = MI.getOperand(OpIdx++).getReg(); + MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead)); + } + // Copy the addrmode6 operands. + bool AddrIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill)); + MIB.addImm(MI.getOperand(OpIdx++).getImm()); + if (hasWriteBack) { + // Copy the am6offset operand. + bool OffsetIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill)); + } + + MIB = AddDefaultPred(MIB); + TransferImpOps(MI, MIB, MIB); + // For an instruction writing the odd subregs, add an implicit use of the + // super-register because the even subregs were loaded separately. + if (RegSpc == OddDblSpc) + MIB.addReg(DstReg, RegState::Implicit); + // Add an implicit def for the super-register. + MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); + MI.eraseFromParent(); +} + +/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register +/// operands to real VST instructions with D register operands. +void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI, + unsigned Opc, bool hasWriteBack, + NEONRegSpacing RegSpc, unsigned NumRegs) { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + unsigned OpIdx = 0; + if (hasWriteBack) { + bool DstIsDead = MI.getOperand(OpIdx).isDead(); + unsigned DstReg = MI.getOperand(OpIdx++).getReg(); + MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); + } + // Copy the addrmode6 operands. + bool AddrIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill)); + MIB.addImm(MI.getOperand(OpIdx++).getImm()); + if (hasWriteBack) { + // Copy the am6offset operand. + bool OffsetIsKill = MI.getOperand(OpIdx).isKill(); + MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill)); + } + + bool SrcIsKill = MI.getOperand(OpIdx).isKill(); + unsigned SrcReg = MI.getOperand(OpIdx).getReg(); + unsigned D0, D1, D2, D3; + if (RegSpc == SingleSpc) { + D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); + D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); + D2 = TRI->getSubReg(SrcReg, ARM::dsub_2); + D3 = TRI->getSubReg(SrcReg, ARM::dsub_3); + } else if (RegSpc == EvenDblSpc) { + D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); + D1 = TRI->getSubReg(SrcReg, ARM::dsub_2); + D2 = TRI->getSubReg(SrcReg, ARM::dsub_4); + D3 = TRI->getSubReg(SrcReg, ARM::dsub_6); + } else { + assert(RegSpc == OddDblSpc && "unknown register spacing for VST"); + D0 = TRI->getSubReg(SrcReg, ARM::dsub_1); + D1 = TRI->getSubReg(SrcReg, ARM::dsub_3); + D2 = TRI->getSubReg(SrcReg, ARM::dsub_5); + D3 = TRI->getSubReg(SrcReg, ARM::dsub_7); + } + + MIB.addReg(D0).addReg(D1); + if (NumRegs > 2) + MIB.addReg(D2); + if (NumRegs > 3) + MIB.addReg(D3); + MIB = AddDefaultPred(MIB); + TransferImpOps(MI, MIB, MIB); + if (SrcIsKill) + // Add an implicit kill for the super-reg. + (*MIB).addRegisterKilled(SrcReg, TRI, true); + MI.eraseFromParent(); +} + bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -71,9 +205,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineInstr &MI = *MBBI; MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); + bool ModifiedOp = true; unsigned Opcode = MI.getOpcode(); switch (Opcode) { - default: break; + default: + ModifiedOp = false; + break; + case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) @@ -92,10 +230,10 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { .addOperand(MI.getOperand(2)); TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); - Modified = true; break; } + case ARM::MOVi32imm: case ARM::t2MOVi32imm: { unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); @@ -104,9 +242,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { const MachineOperand &MO = MI.getOperand(1); MachineInstrBuilder LO16, HI16; - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == ARM::MOVi32imm ? + ARM::MOVi16 : ARM::t2MOVi16), DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Opcode == ARM::MOVi32imm ? + ARM::MOVTi16 : ARM::t2MOVTi16)) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) .addReg(DstReg); @@ -128,7 +270,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { HI16.addImm(Pred).addReg(PredReg); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); - Modified = true; break; } @@ -155,9 +296,211 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { .addReg(OddSrc, getKillRegState(SrcIsKill))); TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); - Modified = true; } + + case ARM::VLD1q8Pseudo: + ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break; + case ARM::VLD1q16Pseudo: + ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break; + case ARM::VLD1q32Pseudo: + ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break; + case ARM::VLD1q64Pseudo: + ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break; + case ARM::VLD1q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break; + case ARM::VLD1q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break; + case ARM::VLD1q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break; + case ARM::VLD1q64Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break; + + case ARM::VLD2d8Pseudo: + ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break; + case ARM::VLD2d16Pseudo: + ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break; + case ARM::VLD2d32Pseudo: + ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break; + case ARM::VLD2q8Pseudo: + ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break; + case ARM::VLD2q16Pseudo: + ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break; + case ARM::VLD2q32Pseudo: + ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break; + case ARM::VLD2d8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break; + case ARM::VLD2d16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break; + case ARM::VLD2d32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break; + case ARM::VLD2q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break; + case ARM::VLD2q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break; + case ARM::VLD2q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break; + + case ARM::VLD3d8Pseudo: + ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break; + case ARM::VLD3d16Pseudo: + ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break; + case ARM::VLD3d32Pseudo: + ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break; + case ARM::VLD1d64TPseudo: + ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break; + case ARM::VLD3d8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break; + case ARM::VLD3d16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break; + case ARM::VLD3d32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break; + case ARM::VLD1d64TPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break; + case ARM::VLD3q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q8oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break; + case ARM::VLD3q16oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break; + case ARM::VLD3q32oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break; + + case ARM::VLD4d8Pseudo: + ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break; + case ARM::VLD4d16Pseudo: + ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break; + case ARM::VLD4d32Pseudo: + ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break; + case ARM::VLD1d64QPseudo: + ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break; + case ARM::VLD4d8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break; + case ARM::VLD4d16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break; + case ARM::VLD4d32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break; + case ARM::VLD1d64QPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break; + case ARM::VLD4q8Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q16Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q32Pseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q8oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break; + case ARM::VLD4q16oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break; + case ARM::VLD4q32oddPseudo_UPD: + ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break; + + case ARM::VST1q8Pseudo: + ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break; + case ARM::VST1q16Pseudo: + ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break; + case ARM::VST1q32Pseudo: + ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break; + case ARM::VST1q64Pseudo: + ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break; + case ARM::VST1q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break; + case ARM::VST1q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break; + case ARM::VST1q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break; + case ARM::VST1q64Pseudo_UPD: + ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break; + + case ARM::VST2d8Pseudo: + ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break; + case ARM::VST2d16Pseudo: + ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break; + case ARM::VST2d32Pseudo: + ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break; + case ARM::VST2q8Pseudo: + ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break; + case ARM::VST2q16Pseudo: + ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break; + case ARM::VST2q32Pseudo: + ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break; + case ARM::VST2d8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break; + case ARM::VST2d16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break; + case ARM::VST2d32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break; + case ARM::VST2q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break; + case ARM::VST2q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break; + case ARM::VST2q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break; + + case ARM::VST3d8Pseudo: + ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break; + case ARM::VST3d16Pseudo: + ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break; + case ARM::VST3d32Pseudo: + ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break; + case ARM::VST1d64TPseudo: + ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break; + case ARM::VST3d8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break; + case ARM::VST3d16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break; + case ARM::VST3d32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break; + case ARM::VST1d64TPseudo_UPD: + ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break; + case ARM::VST3q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q8oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break; + case ARM::VST3q16oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break; + case ARM::VST3q32oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break; + + case ARM::VST4d8Pseudo: + ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break; + case ARM::VST4d16Pseudo: + ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break; + case ARM::VST4d32Pseudo: + ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break; + case ARM::VST1d64QPseudo: + ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break; + case ARM::VST4d8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break; + case ARM::VST4d16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break; + case ARM::VST4d32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break; + case ARM::VST1d64QPseudo_UPD: + ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break; + case ARM::VST4q8Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break; + case ARM::VST4q16Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break; + case ARM::VST4q32Pseudo_UPD: + ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break; + case ARM::VST4q8oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break; + case ARM::VST4q16oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break; + case ARM::VST4q32oddPseudo_UPD: + ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break; } + + if (ModifiedOp) + Modified = true; MBBI = NMBBI; } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp new file mode 100644 index 0000000000000..4892eae95833b --- /dev/null +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -0,0 +1,665 @@ +//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ARM-specific support for the FastISel class. Some +// of the target-specific code is generated by tablegen in the file +// ARMGenFastISel.inc, which is #included here. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMRegisterInfo.h" +#include "ARMTargetMachine.h" +#include "ARMSubtarget.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +static cl::opt<bool> +EnableARMFastISel("arm-fast-isel", + cl::desc("Turn on experimental ARM fast-isel support"), + cl::init(false), cl::Hidden); + +namespace { + +class ARMFastISel : public FastISel { + + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + const TargetMachine &TM; + const TargetInstrInfo &TII; + const TargetLowering &TLI; + const ARMFunctionInfo *AFI; + + // Convenience variable to avoid checking all the time. + bool isThumb; + + public: + explicit ARMFastISel(FunctionLoweringInfo &funcInfo) + : FastISel(funcInfo), + TM(funcInfo.MF->getTarget()), + TII(*TM.getInstrInfo()), + TLI(*TM.getTargetLowering()) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); + isThumb = AFI->isThumbFunction(); + } + + // Code from FastISel.cpp. + virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass *RC); + virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); + virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm); + virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm); + virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm); + virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, + unsigned Op0, bool Op0IsKill, + uint32_t Idx); + + // Backend specific FastISel code. + virtual bool TargetSelectInstruction(const Instruction *I); + virtual unsigned TargetMaterializeConstant(const Constant *C); + + #include "ARMGenFastISel.inc" + + // Instruction selection routines. + virtual bool ARMSelectLoad(const Instruction *I); + virtual bool ARMSelectStore(const Instruction *I); + virtual bool ARMSelectBranch(const Instruction *I); + + // Utility routines. + private: + bool isTypeLegal(const Type *Ty, EVT &VT); + bool isLoadTypeLegal(const Type *Ty, EVT &VT); + bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset); + bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset); + bool ARMLoadAlloca(const Instruction *I); + bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg); + bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset); + bool ARMMaterializeConstant(const ConstantInt *Val, unsigned &Reg); + + bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); + const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); +}; + +} // end anonymous namespace + +// #include "ARMGenCallingConv.inc" + +// DefinesOptionalPredicate - This is different from DefinesPredicate in that +// we don't care about implicit defs here, just places we'll need to add a +// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. +bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.hasOptionalDef()) + return false; + + // Look to see if our OptionalDef is defining CPSR or CCR. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + if (MO.getReg() == ARM::CPSR) + *CPSR = true; + } + return true; +} + +// If the machine is predicable go ahead and add the predicate operands, if +// it needs default CC operands add those. +const MachineInstrBuilder & +ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { + MachineInstr *MI = &*MIB; + + // Do we use a predicate? + if (TII.isPredicable(MI)) + AddDefaultPred(MIB); + + // Do we optionally set a predicate? Preds is size > 0 iff the predicate + // defines CPSR. All other OptionalDefines in ARM are the CCR register. + bool CPSR = false; + if (DefinesOptionalPredicate(MI, &CPSR)) { + if (CPSR) + AddDefaultT1CC(MIB); + else + AddDefaultCC(MIB); + } + return MIB; +} + +unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass* RC) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(Imm)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + +unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, + unsigned Op0, bool Op0IsKill, + uint32_t Idx) { + unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + assert(TargetRegisterInfo::isVirtualRegister(Op0) && + "Cannot yet extract from physregs"); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx)); + return ResultReg; +} + +unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { + EVT VT = TLI.getValueType(C->getType(), true); + + // Only handle simple types. + if (!VT.isSimple()) return 0; + + // TODO: This should be safe for fp because they're just bits from the + // Constant. + // TODO: Theoretically we could materialize fp constants with instructions + // from VFP3. + + // MachineConstantPool wants an explicit alignment. + unsigned Align = TD.getPrefTypeAlignment(C->getType()); + if (Align == 0) { + // TODO: Figure out if this is correct. + Align = TD.getTypeAllocSize(C->getType()); + } + unsigned Idx = MCP.getConstantPoolIndex(C, Align); + + unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + // Different addressing modes between ARM/Thumb2 for constant pool loads. + if (isThumb) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::t2LDRpci)) + .addReg(DestReg).addConstantPoolIndex(Idx)); + else + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::LDRcp)) + .addReg(DestReg).addConstantPoolIndex(Idx) + .addReg(0).addImm(0)); + + return DestReg; +} + +bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) { + VT = TLI.getValueType(Ty, true); + + // Only handle simple types. + if (VT == MVT::Other || !VT.isSimple()) return false; + + // Handle all legal types, i.e. a register that will directly hold this + // value. + return TLI.isTypeLegal(VT); +} + +bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) { + if (isTypeLegal(Ty, VT)) return true; + + // If this is a type than can be sign or zero-extended to a basic operation + // go ahead and accept it now. + if (VT == MVT::i8 || VT == MVT::i16) + return true; + + return false; +} + +// Computes the Reg+Offset to get to an object. +bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg, + int &Offset) { + // Some boilerplate from the X86 FastISel. + const User *U = NULL; + unsigned Opcode = Instruction::UserOp1; + if (const Instruction *I = dyn_cast<Instruction>(Obj)) { + // Don't walk into other basic blocks; it's possible we haven't + // visited them yet, so the instructions may not yet be assigned + // virtual registers. + if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) + return false; + + Opcode = I->getOpcode(); + U = I; + } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { + Opcode = C->getOpcode(); + U = C; + } + + if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) + if (Ty->getAddressSpace() > 255) + // Fast instruction selection doesn't support the special + // address spaces. + return false; + + switch (Opcode) { + default: + //errs() << "Failing Opcode is: " << *Op1 << "\n"; + break; + case Instruction::Alloca: { + assert(false && "Alloca should have been handled earlier!"); + return false; + } + } + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { + //errs() << "Failing GV is: " << GV << "\n"; + (void)GV; + return false; + } + + // Try to get this in a register if nothing else has worked. + Reg = getRegForValue(Obj); + if (Reg == 0) return false; + + // Since the offset may be too large for the load instruction + // get the reg+offset into a register. + // TODO: Verify the additions work, otherwise we'll need to add the + // offset instead of 0 to the instructions and do all sorts of operand + // munging. + // TODO: Optimize this somewhat. + if (Offset != 0) { + ARMCC::CondCodes Pred = ARMCC::AL; + unsigned PredReg = 0; + + if (!isThumb) + emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + Reg, Reg, Offset, Pred, PredReg, + static_cast<const ARMBaseInstrInfo&>(TII)); + else { + assert(AFI->isThumb2Function()); + emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + Reg, Reg, Offset, Pred, PredReg, + static_cast<const ARMBaseInstrInfo&>(TII)); + } + } + + return true; +} + +bool ARMFastISel::ARMLoadAlloca(const Instruction *I) { + Value *Op0 = I->getOperand(0); + + // Verify it's an alloca. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + unsigned ResultReg = createResultReg(RC); + TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, + ResultReg, SI->second, RC, + TM.getRegisterInfo()); + UpdateValueMap(I, ResultReg); + return true; + } + } + return false; +} + +bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, + unsigned Reg, int Offset) { + + assert(VT.isSimple() && "Non-simple types are invalid here!"); + unsigned Opc; + + switch (VT.getSimpleVT().SimpleTy) { + default: + assert(false && "Trying to emit for an unhandled type!"); + return false; + case MVT::i16: + Opc = isThumb ? ARM::tLDRH : ARM::LDRH; + VT = MVT::i32; + break; + case MVT::i8: + Opc = isThumb ? ARM::tLDRB : ARM::LDRB; + VT = MVT::i32; + break; + case MVT::i32: + Opc = isThumb ? ARM::tLDR : ARM::LDR; + break; + } + + ResultReg = createResultReg(TLI.getRegClassFor(VT)); + + // TODO: Fix the Addressing modes so that these can share some code. + // Since this is a Thumb1 load this will work in Thumb1 or 2 mode. + if (isThumb) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(Reg).addImm(Offset).addReg(0)); + else + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(Reg).addReg(0).addImm(Offset)); + + return true; +} + +bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg) { + Value *Op1 = I->getOperand(1); + + // Verify it's an alloca. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + assert(SrcReg != 0 && "Nothing to store!"); + TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, + SrcReg, true /*isKill*/, SI->second, RC, + TM.getRegisterInfo()); + return true; + } + } + return false; +} + +bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, + unsigned DstReg, int Offset) { + unsigned StrOpc; + switch (VT.getSimpleVT().SimpleTy) { + default: return false; + case MVT::i1: + case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break; + case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break; + case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break; + case MVT::f32: + if (!Subtarget->hasVFP2()) return false; + StrOpc = ARM::VSTRS; + break; + case MVT::f64: + if (!Subtarget->hasVFP2()) return false; + StrOpc = ARM::VSTRD; + break; + } + + if (isThumb) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(StrOpc), SrcReg) + .addReg(DstReg).addImm(Offset).addReg(0)); + else + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(StrOpc), SrcReg) + .addReg(DstReg).addReg(0).addImm(Offset)); + + return true; +} + +bool ARMFastISel::ARMSelectStore(const Instruction *I) { + Value *Op0 = I->getOperand(0); + unsigned SrcReg = 0; + + // Yay type legalization + EVT VT; + if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) + return false; + + // Get the value to be stored into a register. + SrcReg = getRegForValue(Op0); + if (SrcReg == 0) + return false; + + // If we're an alloca we know we have a frame index and can emit the store + // quickly. + if (ARMStoreAlloca(I, SrcReg)) + return true; + + // Our register and offset with innocuous defaults. + unsigned Reg = 0; + int Offset = 0; + + // See if we can handle this as Reg + Offset + if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset)) + return false; + + if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false; + + return false; + +} + +bool ARMFastISel::ARMSelectLoad(const Instruction *I) { + // If we're an alloca we know we have a frame index and can emit the load + // directly in short order. + if (ARMLoadAlloca(I)) + return true; + + // Verify we have a legal type before going any further. + EVT VT; + if (!isLoadTypeLegal(I->getType(), VT)) + return false; + + // Our register and offset with innocuous defaults. + unsigned Reg = 0; + int Offset = 0; + + // See if we can handle this as Reg + Offset + if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset)) + return false; + + unsigned ResultReg; + if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool ARMFastISel::ARMSelectBranch(const Instruction *I) { + const BranchInst *BI = cast<BranchInst>(I); + MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; + + // Simple branch support. + unsigned CondReg = getRegForValue(BI->getCondition()); + if (CondReg == 0) return false; + + unsigned CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; + unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + .addReg(CondReg).addReg(CondReg)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) + .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + FastEmitBranch(FBB, DL); + FuncInfo.MBB->addSuccessor(TBB); + return true; +} + +// TODO: SoftFP support. +bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { + // No Thumb-1 for now. + if (isThumb && !AFI->isThumb2Function()) return false; + + switch (I->getOpcode()) { + case Instruction::Load: + return ARMSelectLoad(I); + case Instruction::Store: + return ARMSelectStore(I); + case Instruction::Br: + return ARMSelectBranch(I); + default: break; + } + return false; +} + +namespace llvm { + llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { + if (EnableARMFastISel) return new ARMFastISel(funcInfo); + return 0; + } +} diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp new file mode 100644 index 0000000000000..85b0c6c248d02 --- /dev/null +++ b/lib/Target/ARM/ARMGlobalMerge.cpp @@ -0,0 +1,212 @@ +//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass merges globals with internal linkage into one. This way all the +// globals which were merged into a biggest one can be addressed using offsets +// from the same base pointer (no need for separate base pointer for each of the +// global). Such a transformation can significantly reduce the register pressure +// when many globals are involved. +// +// For example, consider the code which touches several global variables at once: +// +// static int foo[N], bar[N], baz[N]; +// +// for (i = 0; i < N; ++i) { +// foo[i] = bar[i] * baz[i]; +// } +// +// On ARM the addresses of 3 arrays should be kept in the registers, thus +// this code has quite large register pressure (loop body): +// +// ldr r1, [r5], #4 +// ldr r2, [r6], #4 +// mul r1, r2, r1 +// str r1, [r0], #4 +// +// Pass converts the code to something like: +// +// static struct { +// int foo[N]; +// int bar[N]; +// int baz[N]; +// } merged; +// +// for (i = 0; i < N; ++i) { +// merged.foo[i] = merged.bar[i] * merged.baz[i]; +// } +// +// and in ARM code this becomes: +// +// ldr r0, [r5, #40] +// ldr r1, [r5, #80] +// mul r0, r1, r0 +// str r0, [r5], #4 +// +// note that we saved 2 registers here almostly "for free". +// ===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-global-merge" +#include "ARM.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Attributes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +namespace { + class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// target type sizes. + const TargetLowering *TLI; + + bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool) const; + + public: + static char ID; // Pass identification, replacement for typeid. + explicit ARMGlobalMerge(const TargetLowering *tli) + : FunctionPass(ID), TLI(tli) {} + + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function& F); + + const char *getPassName() const { + return "Merge internal globals"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + FunctionPass::getAnalysisUsage(AU); + } + + struct GlobalCmp { + const TargetData *TD; + + GlobalCmp(const TargetData *td): + TD(td) { } + + bool operator() (const GlobalVariable* GV1, + const GlobalVariable* GV2) { + const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType(); + const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType(); + + return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2)); + } + }; + }; +} // end anonymous namespace + +char ARMGlobalMerge::ID = 0; + +bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool isConst) const { + const TargetData *TD = TLI->getTargetData(); + + // FIXME: Infer the maximum possible offset depending on the actual users + // (these max offsets are different for the users inside Thumb or ARM + // functions) + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + + // FIXME: Find better heuristics + std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD)); + + const Type *Int32Ty = Type::getInt32Ty(M.getContext()); + + for (size_t i = 0, e = Globals.size(); i != e; ) { + size_t j = 0; + uint64_t MergedSize = 0; + std::vector<const Type*> Tys; + std::vector<Constant*> Inits; + for (j = i; MergedSize < MaxOffset && j != e; ++j) { + const Type* Ty = Globals[j]->getType()->getElementType(); + Tys.push_back(Ty); + Inits.push_back(Globals[j]->getInitializer()); + MergedSize += TD->getTypeAllocSize(Ty); + } + + StructType* MergedTy = StructType::get(M.getContext(), Tys); + Constant* MergedInit = ConstantStruct::get(MergedTy, Inits); + GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst, + GlobalValue::InternalLinkage, + MergedInit, "merged"); + for (size_t k = i; k < j; ++k) { + SmallVector<Constant*, 2> Idx; + Idx.push_back(ConstantInt::get(Int32Ty, 0)); + Idx.push_back(ConstantInt::get(Int32Ty, k-i)); + + Constant* GEP = + ConstantExpr::getInBoundsGetElementPtr(MergedGV, + &Idx[0], Idx.size()); + + Globals[k]->replaceAllUsesWith(GEP); + Globals[k]->eraseFromParent(); + } + i = j; + } + + return true; +} + + +bool ARMGlobalMerge::doInitialization(Module& M) { + SmallVector<GlobalVariable*, 16> Globals, ConstGlobals; + const TargetData *TD = TLI->getTargetData(); + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + bool Changed = false; + + // Grab all non-const globals. + for (Module::global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + // Merge is safe for "normal" internal globals only + if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) + continue; + + // Ignore fancy-aligned globals for now. + if (I->getAlignment() != 0) + continue; + + // Ignore all 'special' globals. + if (I->getName().startswith("llvm.") || + I->getName().startswith(".llvm.")) + continue; + + if (TD->getTypeAllocSize(I->getType()) < MaxOffset) { + if (I->isConstant()) + ConstGlobals.push_back(I); + else + Globals.push_back(I); + } + } + + if (Globals.size() > 1) + Changed |= doMerge(Globals, M, false); + // FIXME: This currently breaks the EH processing due to way how the + // typeinfo detection works. We might want to detect the TIs and ignore + // them in the future. + + // if (ConstGlobals.size() > 1) + // Changed |= doMerge(ConstGlobals, M, true); + + return Changed; +} + +bool ARMGlobalMerge::runOnFunction(Function& F) { + return false; +} + +FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) { + return new ARMGlobalMerge(tli); +} diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c84d3ff813248..51a30c158dd1b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -36,6 +36,11 @@ using namespace llvm; +static cl::opt<bool> +DisableShifterOp("disable-shifter-op", cl::Hidden, + cl::desc("Disable isel of shifter-op"), + cl::init(false)); + //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -113,6 +118,16 @@ public: bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); + inline bool Pred_so_imm(SDNode *inN) const { + ConstantSDNode *N = cast<ConstantSDNode>(inN); + return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; + } + + inline bool Pred_t2_so_imm(SDNode *inN) const { + ConstantSDNode *N = cast<ConstantSDNode>(inN); + return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1; + } + // Include the pieces autogenerated from the target description. #include "ARMGenDAGISel.inc" @@ -220,6 +235,9 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op, SDValue &BaseReg, SDValue &ShReg, SDValue &Opc) { + if (DisableShifterOp) + return false; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); // Don't match base register only case. That is matched to a separate @@ -463,7 +481,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Mode) { Addr = N; - Mode = CurDAG->getTargetConstant(0, MVT::i32); + Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32); return true; } @@ -666,6 +684,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N, bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N, SDValue &BaseReg, SDValue &Opc) { + if (DisableShifterOp) + return false; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); // Don't match base register only case. That is matched to a separate @@ -1090,110 +1111,79 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, break; } + EVT ResTy; + if (NumVecs == 1) + ResTy = VT; + else { + unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; + if (!is64BitVector) + ResTyElts *= 2; + ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); + } + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue SuperReg; if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; - std::vector<EVT> ResTys(NumVecs, VT); - ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); - if (NumVecs < 2) + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5); + if (NumVecs == 1) return VLd; - SDValue RegSeq; - SDValue V0 = SDValue(VLd, 0); - SDValue V1 = SDValue(VLd, 1); - - // Form a REG_SEQUENCE to force register allocation. - if (NumVecs == 2) - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - else { - SDValue V2 = SDValue(VLd, 2); - // If it's a vld3, form a quad D-register but discard the last part. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : SDValue(VLd, 3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } - + SuperReg = SDValue(VLd, 0); assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec, - dl, VT, RegSeq); + dl, VT, SuperReg); ReplaceUses(SDValue(N, Vec), D); } - ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); return NULL; } - EVT RegVT = GetNEONSubregVT(VT); if (NumVecs <= 2) { // Quad registers are directly supported for VLD1 and VLD2, // loading pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; - std::vector<EVT> ResTys(2 * NumVecs, RegVT); - ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); - Chain = SDValue(VLd, 2 * NumVecs); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5); + if (NumVecs == 1) + return VLd; + + SuperReg = SDValue(VLd, 0); + Chain = SDValue(VLd, 1); - // Combine the even and odd subregs to produce the result. - if (NumVecs == 1) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); - ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); - } else { - SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, - SDValue(VLd, 0), SDValue(VLd, 1), - SDValue(VLd, 2), SDValue(VLd, 3)), 0); - SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); - SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); - ReplaceUses(SDValue(N, 0), Q0); - ReplaceUses(SDValue(N, 1), Q1); - } } else { // Otherwise, quad registers are loaded with two separate instructions, // where one loads the even registers and the other loads the odd registers. - - std::vector<EVT> ResTys(NumVecs, RegVT); - ResTys.push_back(MemAddr.getValueType()); - ResTys.push_back(MVT::Other); + EVT AddrTy = MemAddr.getValueType(); // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6); - Chain = SDValue(VLdA, NumVecs+1); + SDValue ImplDef = + SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); + const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; + SDNode *VLdA = + CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7); + Chain = SDValue(VLdA, 2); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), - Align, Reg0, Pred, Reg0, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); - Chain = SDValue(VLdB, NumVecs+1); - - SDValue V0 = SDValue(VLdA, 0); - SDValue V1 = SDValue(VLdB, 0); - SDValue V2 = SDValue(VLdA, 1); - SDValue V3 = SDValue(VLdB, 1); - SDValue V4 = SDValue(VLdA, 2); - SDValue V5 = SDValue(VLdB, 2); - SDValue V6 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) - : SDValue(VLdA, 3); - SDValue V7 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) - : SDValue(VLdB, 3); - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, - V4, V5, V6, V7), 0); - - // Extract out the 3 / 4 Q registers. - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, - dl, VT, RegSeq); - ReplaceUses(SDValue(N, Vec), Q); - } + const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0), + Pred, Reg0, Chain }; + SDNode *VLdB = + CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7); + SuperReg = SDValue(VLdB, 0); + Chain = SDValue(VLdB, 2); + } + + // Extract out the Q registers. + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, + dl, VT, SuperReg); + ReplaceUses(SDValue(N, Vec), Q); } ReplaceUses(SDValue(N, NumVecs), Chain); return NULL; @@ -1235,12 +1225,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 10> Ops; + SmallVector<SDValue, 7> Ops; Ops.push_back(MemAddr); Ops.push_back(Align); if (is64BitVector) { - if (NumVecs >= 2) { + if (NumVecs == 1) { + Ops.push_back(N->getOperand(3)); + } else { SDValue RegSeq; SDValue V0 = N->getOperand(0+3); SDValue V1 = N->getOperand(1+3); @@ -1257,111 +1249,61 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, : N->getOperand(3+3); RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, - RegSeq)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, - RegSeq)); - if (NumVecs > 2) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, - RegSeq)); - if (NumVecs > 3) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, - RegSeq)); - } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + Ops.push_back(RegSeq); } Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = DOpcodes[OpcodeIndex]; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6); } - EVT RegVT = GetNEONSubregVT(VT); if (NumVecs <= 2) { - // Quad registers are directly supported for VST1 and VST2, - // storing pairs of D regs. + // Quad registers are directly supported for VST1 and VST2. unsigned Opc = QOpcodes0[OpcodeIndex]; - if (NumVecs == 2) { - // First extract the pair of Q registers. + if (NumVecs == 1) { + Ops.push_back(N->getOperand(3)); + } else { + // Form a QQ register. SDValue Q0 = N->getOperand(3); SDValue Q1 = N->getOperand(4); - - // Form a QQ register. - SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT, - QQ)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT, - QQ)); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4); - } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3))); - } - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), - 5 + 2 * NumVecs); + Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0)); } + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. // Form the QQQQ REG_SEQUENCE. - SDValue V[8]; - for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3)); - V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3)); - } - if (NumVecs == 3) - V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], - V[4], V[5], V[6], V[7]), 0); + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) + : N->getOperand(3+3); + SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); // Store the even D registers. - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); Ops.push_back(Reg0); // post-access address offset - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, - RegVT, RegSeq)); + Ops.push_back(RegSeq); Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); + MVT::Other, Ops.data(), 7); Chain = SDValue(VStA, 1); // Store the odd D registers. Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, - RegVT, RegSeq); - Ops[NumVecs+5] = Chain; + Ops[6] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); + MVT::Other, Ops.data(), 7); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1675,7 +1617,7 @@ SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, if (!T) return 0; - if (Predicate_t2_so_imm(TrueVal.getNode())) { + if (Pred_t2_so_imm(TrueVal.getNode())) { SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; @@ -1692,7 +1634,7 @@ SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, if (!T) return 0; - if (Predicate_so_imm(TrueVal.getNode())) { + if (Pred_so_imm(TrueVal.getNode())) { SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; @@ -1740,7 +1682,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { } // Pattern: (ARMcmov:i32 GPR:i32:$false, - // (imm:i32)<<P:Predicate_so_imm>>:$true, + // (imm:i32)<<P:Pred_so_imm>>:$true, // (imm:i32):$cc) // Emits: (MOVCCi:i32 GPR:i32:$false, // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) @@ -2013,43 +1955,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ResNode = SelectARMIndexedLoad(N); if (ResNode) return ResNode; - - // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value. - if (Subtarget->hasVFP2() && - N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) { - SDValue Chain = N->getOperand(0); - SDValue AM5Opc = - CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = getAL(CurDAG); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain }; - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); - SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl, - MVT::v2f64, MVT::Other, Ops, 5); - cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); - return Ret; - } - // Other cases are autogenerated. - break; - } - case ISD::STORE: { - // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value. - if (Subtarget->hasVFP2() && - N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) { - SDValue Chain = N->getOperand(0); - SDValue AM5Opc = - CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = getAL(CurDAG); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); - SDValue Ops[] = { N->getOperand(1), N->getOperand(2), - AM5Opc, Pred, PredReg, Chain }; - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); - SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6); - cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); - return Ret; - } // Other cases are autogenerated. break; } @@ -2206,39 +2111,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vld1: { unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, ARM::VLD1d32, ARM::VLD1d64 }; - unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, - ARM::VLD1q32, ARM::VLD1q64 }; + unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo, + ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo }; return SelectVLD(N, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld2: { - unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, - ARM::VLD2d32, ARM::VLD1q64 }; - unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 }; + unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo, + ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, + ARM::VLD2q32Pseudo }; return SelectVLD(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld3: { - unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16, - ARM::VLD3d32, ARM::VLD1d64T }; - unsigned QOpcodes0[] = { ARM::VLD3q8_UPD, - ARM::VLD3q16_UPD, - ARM::VLD3q32_UPD }; - unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD, - ARM::VLD3q16odd_UPD, - ARM::VLD3q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo, + ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo }; + unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, + ARM::VLD3q16Pseudo_UPD, + ARM::VLD3q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, + ARM::VLD3q16oddPseudo_UPD, + ARM::VLD3q32oddPseudo_UPD }; return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4: { - unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16, - ARM::VLD4d32, ARM::VLD1d64Q }; - unsigned QOpcodes0[] = { ARM::VLD4q8_UPD, - ARM::VLD4q16_UPD, - ARM::VLD4q32_UPD }; - unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD, - ARM::VLD4q16odd_UPD, - ARM::VLD4q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo, + ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo }; + unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, + ARM::VLD4q16Pseudo_UPD, + ARM::VLD4q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, + ARM::VLD4q16oddPseudo_UPD, + ARM::VLD4q32oddPseudo_UPD }; return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } @@ -2266,39 +2172,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst1: { unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, ARM::VST1d32, ARM::VST1d64 }; - unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, - ARM::VST1q32, ARM::VST1q64 }; + unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo, + ARM::VST1q32Pseudo, ARM::VST1q64Pseudo }; return SelectVST(N, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst2: { - unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, - ARM::VST2d32, ARM::VST1q64 }; - unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 }; + unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo, + ARM::VST2d32Pseudo, ARM::VST1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, + ARM::VST2q32Pseudo }; return SelectVST(N, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst3: { - unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16, - ARM::VST3d32, ARM::VST1d64T }; - unsigned QOpcodes0[] = { ARM::VST3q8_UPD, - ARM::VST3q16_UPD, - ARM::VST3q32_UPD }; - unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD, - ARM::VST3q16odd_UPD, - ARM::VST3q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo, + ARM::VST3d32Pseudo, ARM::VST1d64TPseudo }; + unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, + ARM::VST3q16Pseudo_UPD, + ARM::VST3q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, + ARM::VST3q16oddPseudo_UPD, + ARM::VST3q32oddPseudo_UPD }; return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4: { - unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16, - ARM::VST4d32, ARM::VST1d64Q }; - unsigned QOpcodes0[] = { ARM::VST4q8_UPD, - ARM::VST4q16_UPD, - ARM::VST4q32_UPD }; - unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD, - ARM::VST4q16odd_UPD, - ARM::VST4q32odd_UPD }; + unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo, + ARM::VST4d32Pseudo, ARM::VST1d64QPseudo }; + unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, + ARM::VST4q16Pseudo_UPD, + ARM::VST4q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, + ARM::VST4q16oddPseudo_UPD, + ARM::VST4q32oddPseudo_UPD }; return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0091df753eb78..ce4a2c90689c2 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -55,7 +55,14 @@ STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> EnableARMTailCalls("arm-tail-calls", cl::Hidden, cl::desc("Generate tail calls (TEMPORARY OPTION)."), - cl::init(true)); + cl::init(false)); + +// This option should go away when Machine LICM is smart enough to hoist a +// reg-to-reg VDUP. +static cl::opt<bool> +EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden, + cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."), + cl::init(false)); static cl::opt<bool> EnableARMLongCalls("arm-long-calls", cl::Hidden, @@ -122,7 +129,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); + setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); } + setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { @@ -166,6 +176,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); + RegInfo = TM.getRegisterInfo(); if (Subtarget->isTargetDarwin()) { // Uses VFP for Thumb libfuncs if available. @@ -264,7 +275,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::i32, ARM::GPRRegisterClass); if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, ARM::SPRRegisterClass); - addRegisterClass(MVT::f64, ARM::DPRRegisterClass); + if (!Subtarget->isFPOnlySP()) + addRegisterClass(MVT::f64, ARM::DPRRegisterClass); setTruncStoreAction(MVT::f64, MVT::f32, Expand); } @@ -310,9 +322,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); + // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); - setOperationAction(ISD::MUL, MVT::v2i64, Expand); + // Custom handling for some quad-vector types to detect VMULL. + setOperationAction(ISD::MUL, MVT::v8i16, Custom); + setOperationAction(ISD::MUL, MVT::v4i32, Custom); + setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); @@ -410,12 +427,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // doesn't yet know how to not do that for SjLj. setExceptionSelectorRegister(ARM::R0); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise - // use the default expansion. - bool canHandleAtomics = - (Subtarget->hasV7Ops() || - (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())); - if (canHandleAtomics) { + // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use + // the default expansion. + if (Subtarget->hasDataBarrier() || + (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) { // membarrier needs custom lowering; the rest are legal and handled // normally. setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); @@ -466,10 +481,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); + } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -481,9 +498,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::SETCC, MVT::f64, Expand); - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); @@ -530,6 +547,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); + if (Subtarget->hasV6T2Ops()) + setTargetDAGCombine(ISD::OR); + setStackPointerRegisterToSaveRestore(ARM::SP); if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) @@ -547,6 +567,37 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) benefitFromCodePlacementOpt = true; } +std::pair<const TargetRegisterClass*, uint8_t> +ARMTargetLowering::findRepresentativeClass(EVT VT) const{ + const TargetRegisterClass *RRC = 0; + uint8_t Cost = 1; + switch (VT.getSimpleVT().SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(VT); + // Use DPR as representative register class for all floating point + // and vector types. Since there are 32 SPR registers and 32 DPR registers so + // the cost is 1 for both f32 and f64. + case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: + case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: + RRC = ARM::DPRRegisterClass; + break; + case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: + case MVT::v4f32: case MVT::v2f64: + RRC = ARM::DPRRegisterClass; + Cost = 2; + break; + case MVT::v4i64: + RRC = ARM::DPRRegisterClass; + Cost = 4; + break; + case MVT::v8i64: + RRC = ARM::DPRRegisterClass; + Cost = 8; + break; + } + return std::make_pair(RRC, Cost); +} + const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; @@ -561,6 +612,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; + case ARMISD::AND: return "ARMISD::AND"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; @@ -635,9 +687,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::VMULLs: return "ARMISD::VMULLs"; + case ARMISD::VMULLu: return "ARMISD::VMULLu"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; + case ARMISD::BFI: return "ARMISD::BFI"; } } @@ -656,11 +711,23 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { return TargetLowering::getRegClassFor(VT); } +// Create a fast isel object. +FastISel * +ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { + return ARM::createFastISel(funcInfo); +} + /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; } +/// getMaximalGlobalOffset - Returns the maximal possible offset which can +/// be used for loads / stores from the global. +unsigned ARMTargetLowering::getMaximalGlobalOffset() const { + return (Subtarget->isThumb1Only() ? 127 : 4095); +} + Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { unsigned NumVals = N->getNumValues(); if (!NumVals) @@ -688,6 +755,24 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { return Sched::RegPressure; } +unsigned +ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + switch (RC->getID()) { + default: + return 0; + case ARM::tGPRRegClassID: + return RegInfo->hasFP(MF) ? 4 : 5; + case ARM::GPRRegClassID: { + unsigned FP = RegInfo->hasFP(MF) ? 1 : 0; + return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0); + } + case ARM::SPRRegClassID: // Currently not used as 'rep' register class. + case ARM::DPRRegClassID: + return 32 - 10; + } +} + //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// @@ -793,8 +878,9 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCState &State, bool CanFail) { static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 }; - unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); + unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { // For the 2nd half of a v2f64, do not just fail. if (CanFail) @@ -812,6 +898,10 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, if (HiRegList[i] == Reg) break; + unsigned T = State.AllocateReg(LoRegList[i]); + (void)T; + assert(T == LoRegList[i] && "Could not allocate register"); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], LocVT, LocInfo)); @@ -1624,6 +1714,10 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); } +unsigned ARMTargetLowering::getJumpTableEncoding() const { + return MachineJumpTableInfo::EK_Inline; +} + SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -1917,17 +2011,19 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, DebugLoc dl = Op.getDebugLoc(); SDValue Op5 = Op.getOperand(5); unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); - // v6 and v7 can both handle barriers directly, but need handled a bit - // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should + // Some subtargets which have dmb and dsb instructions can handle barriers + // directly. Some ARMv6 cpus can support them with the help of mcr + // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should // never get here. unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; - if (Subtarget->hasV7Ops()) + if (Subtarget->hasDataBarrier()) return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); - else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()) + else { + assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() && + "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, MVT::i32)); - assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); - return SDValue(); + } } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { @@ -1945,54 +2041,6 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { } SDValue -ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, - SelectionDAG &DAG) const { - SDNode *Node = Op.getNode(); - DebugLoc dl = Node->getDebugLoc(); - EVT VT = Node->getValueType(0); - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - SDValue Align = Op.getOperand(2); - - // Chain the dynamic stack allocation so that it doesn't modify the stack - // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); - - unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue(); - unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment(); - if (AlignVal > StackAlign) - // Do this now since selection pass cannot introduce new target - // independent node. - Align = DAG.getConstant(-(uint64_t)AlignVal, VT); - - // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up - // using a "add r, sp, r" instead. Negate the size now so we don't have to - // do even more horrible hack later. - MachineFunction &MF = DAG.getMachineFunction(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - if (AFI->isThumb1OnlyFunction()) { - bool Negate = true; - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size); - if (C) { - uint32_t Val = C->getZExtValue(); - if (Val <= 508 && ((Val & 3) == 0)) - Negate = false; - } - if (Negate) - Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size); - } - - SDVTList VTList = DAG.getVTList(VT, MVT::Other); - SDValue Ops1[] = { Chain, Size, Align }; - SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3); - Chain = Res.getValue(1); - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), SDValue()); - SDValue Ops2[] = { Res, Chain }; - return DAG.getMergeValues(Ops2, 2, dl); -} - -SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, DebugLoc dl) const { @@ -2229,28 +2277,28 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, default: break; case ISD::SETLT: case ISD::SETGE: - if (isLegalICmpImmediate(C-1)) { + if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETULT: case ISD::SETUGE: - if (C > 0 && isLegalICmpImmediate(C-1)) { + if (C != 0 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETLE: case ISD::SETGT: - if (isLegalICmpImmediate(C+1)) { + if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; RHS = DAG.getConstant(C+1, MVT::i32); } break; case ISD::SETULE: case ISD::SETUGT: - if (C < 0xffffffff && isLegalICmpImmediate(C+1)) { + if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; RHS = DAG.getConstant(C+1, MVT::i32); } @@ -2287,6 +2335,52 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); } +SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { + SDValue Cond = Op.getOperand(0); + SDValue SelectTrue = Op.getOperand(1); + SDValue SelectFalse = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + // Convert: + // + // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) + // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) + // + if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { + const ConstantSDNode *CMOVTrue = + dyn_cast<ConstantSDNode>(Cond.getOperand(0)); + const ConstantSDNode *CMOVFalse = + dyn_cast<ConstantSDNode>(Cond.getOperand(1)); + + if (CMOVTrue && CMOVFalse) { + unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); + unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); + + SDValue True; + SDValue False; + if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { + True = SelectTrue; + False = SelectFalse; + } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { + True = SelectFalse; + False = SelectTrue; + } + + if (True.getNode() && False.getNode()) { + EVT VT = Cond.getValueType(); + SDValue ARMcc = Cond.getOperand(2); + SDValue CCR = Cond.getOperand(3); + SDValue Cmp = Cond.getOperand(4); + return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); + } + } + } + + return DAG.getSelectCC(dl, Cond, + DAG.getConstant(0, Cond.getValueType()), + SelectTrue, SelectFalse, ISD::SETNE); +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -2403,8 +2497,9 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { bool SeenZero = false; if (canChangeToInt(LHS, SeenZero, Subtarget) && canChangeToInt(RHS, SeenZero, Subtarget) && - // If one of the operand is zero, it's safe to ignore the NaN case. - (FiniteOnlyFPMath() || SeenZero)) { + // If one of the operand is zero, it's safe to ignore the NaN case since + // we only care about equality comparisons. + (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { // If unsafe fp math optimization is enabled and there are no othter uses of // the CMP operands, and the condition code is EQ oe NE, we can optimize it // to an integer comparison. @@ -2587,7 +2682,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass); + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } @@ -2730,6 +2825,24 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } +SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, + SelectionDAG &DAG) const { + // The rounding mode is in bits 23:22 of the FPSCR. + // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 + // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) + // so that the shift + and get folded into a bitfield extract. + DebugLoc dl = Op.getDebugLoc(); + SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, + DAG.getConstant(Intrinsic::arm_get_fpscr, + MVT::i32)); + SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, + DAG.getConstant(1U << 22, MVT::i32)); + SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, + DAG.getConstant(22, MVT::i32)); + return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, + DAG.getConstant(3, MVT::i32)); +} + static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); @@ -3046,6 +3159,11 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; + + // Assume that the first shuffle index is not UNDEF. Fail if it is. + if (M[0] < 0) + return false; + Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first @@ -3061,6 +3179,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, ReverseVEXT = true; } + if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast<unsigned>(M[i])) return false; } @@ -3086,13 +3205,16 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; + // If the first shuffle index is UNDEF, be optimistic. + if (M[0] < 0) + BlockElts = BlockSize / EltSz; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) return false; for (unsigned i = 0; i < NumElts; ++i) { - if ((unsigned) M[i] != - (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) + if (M[i] < 0) continue; // ignore UNDEF indices + if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) return false; } @@ -3108,8 +3230,8 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { - if ((unsigned) M[i] != i + WhichResult || - (unsigned) M[i+1] != i + NumElts + WhichResult) + if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || + (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) return false; } return true; @@ -3127,8 +3249,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { - if ((unsigned) M[i] != i + WhichResult || - (unsigned) M[i+1] != i + WhichResult) + if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || + (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) return false; } return true; @@ -3143,6 +3265,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i != NumElts; ++i) { + if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned) M[i] != 2 * i + WhichResult) return false; } @@ -3168,7 +3291,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, for (unsigned j = 0; j != 2; ++j) { unsigned Idx = WhichResult; for (unsigned i = 0; i != Half; ++i) { - if ((unsigned) M[i + j * Half] != Idx) + int MIdx = M[i + j * Half]; + if (MIdx >= 0 && (unsigned) MIdx != Idx) return false; Idx += 2; } @@ -3191,8 +3315,8 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { - if ((unsigned) M[i] != Idx || - (unsigned) M[i+1] != Idx + NumElts) + if ((M[i] >= 0 && (unsigned) M[i] != Idx) || + (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) return false; Idx += 1; } @@ -3217,8 +3341,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { - if ((unsigned) M[i] != Idx || - (unsigned) M[i+1] != Idx) + if ((M[i] >= 0 && (unsigned) M[i] != Idx) || + (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) return false; Idx += 1; } @@ -3230,9 +3354,30 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } +// If N is an integer constant that can be moved into a register in one +// instruction, return an SDValue of such a constant (will become a MOV +// instruction). Otherwise return null. +static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, + const ARMSubtarget *ST, DebugLoc dl) { + uint64_t Val; + if (!isa<ConstantSDNode>(N)) + return SDValue(); + Val = cast<ConstantSDNode>(N)->getZExtValue(); + + if (ST->isThumb1Only()) { + if (Val <= 255 || ~Val <= 255) + return DAG.getConstant(Val, MVT::i32); + } else { + if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) + return DAG.getConstant(Val, MVT::i32); + } + return SDValue(); +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. -static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -3292,15 +3437,41 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { if (isOnlyLowElement) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); - // If all elements are constants, fall back to the default expansion, which - // will generate a load from the constant pool. + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + + if (EnableARMVDUPsplat) { + // Use VDUP for non-constant splats. For f32 constant splats, reduce to + // i32 and try again. + if (usesOnlyOneValue && EltSize <= 32) { + if (!isConstant) + return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (VT.getVectorElementType().isFloatingPoint()) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumElts; ++i) + Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, + Op.getOperand(i))); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0], + NumElts); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + LowerBUILD_VECTOR(Val, DAG, ST)); + } + SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); + if (Val.getNode()) + return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + } + } + + // If all elements are constants and the case above didn't get hit, fall back + // to the default expansion, which will generate a load from the constant + // pool. if (isConstant) return SDValue(); - // Use VDUP for non-constant splats. - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (usesOnlyOneValue && EltSize <= 32) - return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (!EnableARMVDUPsplat) { + // Use VDUP for non-constant splats. + if (usesOnlyOneValue && EltSize <= 32) + return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + } // Vectors with 32- or 64-bit elements can be built by directly assigning // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands @@ -3585,6 +3756,51 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); } +/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or +/// an extending load, return the unextended value. +static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) + return N->getOperand(0); + LoadSDNode *LD = cast<LoadSDNode>(N); + return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), + LD->getBasePtr(), LD->getSrcValue(), + LD->getSrcValueOffset(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); +} + +static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { + // Multiplications are only custom-lowered for 128-bit vectors so that + // VMULL can be detected. Otherwise v2i64 multiplications are not legal. + EVT VT = Op.getValueType(); + assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); + SDNode *N0 = Op.getOperand(0).getNode(); + SDNode *N1 = Op.getOperand(1).getNode(); + unsigned NewOpc = 0; + if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) && + (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) { + NewOpc = ARMISD::VMULLs; + } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) && + (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) { + NewOpc = ARMISD::VMULLu; + } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) { + // Fall through to expand this. It is not legal. + return SDValue(); + } else { + // Other vector multiplications are legal. + return Op; + } + + // Legalize to a VMULL instruction. + DebugLoc DL = Op.getDebugLoc(); + SDValue Op0 = SkipExtension(N0, DAG); + SDValue Op1 = SkipExtension(N1, DAG); + + assert(Op0.getValueType().is64BitVector() && + Op1.getValueType().is64BitVector() && + "unexpected types for extended operands to VMULL"); + return DAG.getNode(NewOpc, DL, VT, Op0, Op1); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -3594,10 +3810,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); - case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::SINT_TO_FP: @@ -3621,10 +3837,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::VSETCC: return LowerVSETCC(Op, DAG); - case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); + case ISD::MUL: return LowerMUL(Op, DAG); } return SDValue(); } @@ -4002,78 +4220,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } - - case ARM::tANDsp: - case ARM::tADDspr_: - case ARM::tSUBspi_: - case ARM::t2SUBrSPi_: - case ARM::t2SUBrSPi12_: - case ARM::t2SUBrSPs_: { - MachineFunction *MF = BB->getParent(); - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); - bool DstIsDead = MI->getOperand(0).isDead(); - bool SrcIsKill = MI->getOperand(1).isKill(); - - if (SrcReg != ARM::SP) { - // Copy the source to SP from virtual register. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); - unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) - ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; - BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP) - .addReg(SrcReg, getKillRegState(SrcIsKill)); - } - - unsigned OpOpc = 0; - bool NeedPred = false, NeedCC = false, NeedOp3 = false; - switch (MI->getOpcode()) { - default: - llvm_unreachable("Unexpected pseudo instruction!"); - case ARM::tANDsp: - OpOpc = ARM::tAND; - NeedPred = true; - break; - case ARM::tADDspr_: - OpOpc = ARM::tADDspr; - break; - case ARM::tSUBspi_: - OpOpc = ARM::tSUBspi; - break; - case ARM::t2SUBrSPi_: - OpOpc = ARM::t2SUBrSPi; - NeedPred = true; NeedCC = true; - break; - case ARM::t2SUBrSPi12_: - OpOpc = ARM::t2SUBrSPi12; - NeedPred = true; - break; - case ARM::t2SUBrSPs_: - OpOpc = ARM::t2SUBrSPs; - NeedPred = true; NeedCC = true; NeedOp3 = true; - break; - } - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP); - if (OpOpc == ARM::tAND) - AddDefaultT1CC(MIB); - MIB.addReg(ARM::SP); - MIB.addOperand(MI->getOperand(2)); - if (NeedOp3) - MIB.addOperand(MI->getOperand(3)); - if (NeedPred) - AddDefaultPred(MIB); - if (NeedCC) - AddDefaultCC(MIB); - - // Copy the result from SP to virtual register. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); - unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) - ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; - BuildMI(*BB, MI, dl, TII->get(CopyOpc)) - .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(ARM::SP); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - } } } @@ -4141,30 +4287,42 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, return SDValue(); } -/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. -static SDValue PerformADDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - // added by evan in r37685 with no testcase. - SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); - +/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with +/// operands N0 and N1. This is a helper for PerformADDCombine that is +/// called with the default operands, and if that fails, with commuted +/// operands. +static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, + TargetLowering::DAGCombinerInfo &DCI) { // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N0, N1, DCI); if (Result.getNode()) return Result; } - if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { - SDValue Result = combineSelectAndUse(N, N1, N0, DCI); - if (Result.getNode()) return Result; - } - return SDValue(); } +/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. +/// +static SDValue PerformADDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // First try with the default operand order. + SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI); + if (Result.getNode()) + return Result; + + // If that didn't work, try again with the operands commuted. + return PerformADDCombineWithOperands(N, N1, N0, DCI); +} + /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. +/// static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - // added by evan in r37685 with no testcase. - SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { @@ -4231,6 +4389,105 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +/// PerformORCombine - Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when + // reasonable. + + // BFI is only available on V6T2+ + if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + // 1) or (and A, mask), val => ARMbfi A, val, mask + // iff (val & mask) == val + // + // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask + // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) + // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) + // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // (i.e., copy a bitfield value into another bitfield of the same width) + if (N0.getOpcode() != ISD::AND) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + + + // The value and the mask need to be constants so we can verify this is + // actually a bitfield set. If the mask is 0xffff, we can do better + // via a movt instruction, so don't use BFI in that case. + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C) + return SDValue(); + unsigned Mask = C->getZExtValue(); + if (Mask == 0xffff) + return SDValue(); + SDValue Res; + // Case (1): or (and A, mask), val => ARMbfi A, val, mask + if ((C = dyn_cast<ConstantSDNode>(N1))) { + unsigned Val = C->getZExtValue(); + if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val) + return SDValue(); + Val >>= CountTrailingZeros_32(~Mask); + + Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), + DAG.getConstant(Val, MVT::i32), + DAG.getConstant(Mask, MVT::i32)); + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + } else if (N1.getOpcode() == ISD::AND) { + // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask + C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!C) + return SDValue(); + unsigned Mask2 = C->getZExtValue(); + + if (ARM::isBitFieldInvertedMask(Mask) && + ARM::isBitFieldInvertedMask(~Mask2) && + (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) { + // The pack halfword instruction works better for masks that fit it, + // so use that when it's available. + if (Subtarget->hasT2ExtractPack() && + (Mask == 0xffff || Mask == 0xffff0000)) + return SDValue(); + // 2a + unsigned lsb = CountTrailingZeros_32(Mask2); + Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), + DAG.getConstant(lsb, MVT::i32)); + Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res, + DAG.getConstant(Mask, MVT::i32)); + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + } else if (ARM::isBitFieldInvertedMask(~Mask) && + ARM::isBitFieldInvertedMask(Mask2) && + (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) { + // The pack halfword instruction works better for masks that fit it, + // so use that when it's available. + if (Subtarget->hasT2ExtractPack() && + (Mask2 == 0xffff || Mask2 == 0xffff0000)) + return SDValue(); + // 2b + unsigned lsb = CountTrailingZeros_32(Mask); + Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(lsb, MVT::i32)); + Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, + DAG.getConstant(Mask2, MVT::i32)); + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + } + } + + return SDValue(); +} + /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, @@ -4561,7 +4818,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { // If the target supports NEON, try to use vmax/vmin instructions for f32 - // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set, + // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is // a NaN; only do the transformation when it matches that behavior. @@ -4648,6 +4905,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); + case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); @@ -5379,6 +5637,21 @@ int ARM::getVFPf64Imm(const APFloat &FPImm) { return ((int)Sign << 7) | (Exp << 4) | Mantissa; } +bool ARM::isBitFieldInvertedMask(unsigned v) { + if (v == 0xffffffff) + return 0; + // there can be 1's on either or both "outsides", all the "inside" + // bits must be 0's + unsigned int lsb = 0, msb = 31; + while (v & (1 << msb)) --msb; + while (v & (1 << lsb)) ++lsb; + for (unsigned int i = lsb; i <= msb; ++i) { + if (v & (1 << i)) + return 0; + } + return 1; +} + /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 128b72e1e743e..ba9ea7f15e7b2 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -17,6 +17,8 @@ #include "ARMSubtarget.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/CallingConvLower.h" #include <vector> @@ -45,6 +47,8 @@ namespace llvm { PIC_ADD, // Add with a PC operand and a PIC label. + AND, // ARM "and" instruction that sets the 's' flag in CPSR. + CMP, // ARM compare instructions. CMPZ, // ARM compare that sets only Z flag. CMPFP, // ARM VFP compare instruction, sets FPSCR. @@ -80,7 +84,7 @@ namespace llvm { MEMBARRIER, // Memory barrier SYNCBARRIER, // Memory sync barrier - + VCEQ, // Vector compare equal. VCGE, // Vector compare greater than or equal. VCGEU, // Vector compare unsigned greater than or equal. @@ -141,6 +145,10 @@ namespace llvm { VUZP, // unzip (deinterleave) VTRN, // transpose + // Vector multiply long: + VMULLs, // ...signed + VMULLu, // ...unsigned + // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other // operations, but for ARM some BUILD_VECTORs are legal as-is and their @@ -150,7 +158,10 @@ namespace llvm { // Floating-point max and min: FMAX, - FMIN + FMIN, + + // Bit-field insert + BFI }; } @@ -162,6 +173,7 @@ namespace llvm { /// returns -1. int getVFPf32Imm(const APFloat &FPImm); int getVFPf64Imm(const APFloat &FPImm); + bool isBitFieldInvertedMask(unsigned v); } //===--------------------------------------------------------------------===// @@ -171,6 +183,8 @@ namespace llvm { public: explicit ARMTargetLowering(TargetMachine &TM); + virtual unsigned getJumpTableEncoding(void) const; + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result @@ -255,8 +269,19 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + /// getMaximalGlobalOffset - Returns the maximal possible offset which can + /// be used for loads / stores from the global. + virtual unsigned getMaximalGlobalOffset() const; + + /// createFastISel - This method returns a target specific FastISel object, + /// or null if the target does not support "fast" ISel. + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; + Sched::Preference getSchedulingPreference(SDNode *N) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -265,11 +290,17 @@ namespace llvm { /// materialize the FP immediate as a load from a constant pool. virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + protected: + std::pair<const TargetRegisterClass*, uint8_t> + findRepresentativeClass(EVT VT) const; + private: /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; + const TargetRegisterInfo *RegInfo; + /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created. /// unsigned ARMPCLabelIndex; @@ -310,14 +341,15 @@ namespace llvm { SelectionDAG &DAG) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -377,6 +409,10 @@ namespace llvm { unsigned BinOpcode) const; }; + + namespace ARM { + FastISel *createFastISel(FunctionLoweringInfo &funcInfo); + } } #endif // ARMISELLOWERING_H diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index ac568e75ccc46..113cfffe61f94 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -36,37 +36,38 @@ def LdStMulFrm : Format<10>; def LdStExFrm : Format<11>; def ArithMiscFrm : Format<12>; -def ExtFrm : Format<13>; - -def VFPUnaryFrm : Format<14>; -def VFPBinaryFrm : Format<15>; -def VFPConv1Frm : Format<16>; -def VFPConv2Frm : Format<17>; -def VFPConv3Frm : Format<18>; -def VFPConv4Frm : Format<19>; -def VFPConv5Frm : Format<20>; -def VFPLdStFrm : Format<21>; -def VFPLdStMulFrm : Format<22>; -def VFPMiscFrm : Format<23>; - -def ThumbFrm : Format<24>; -def MiscFrm : Format<25>; - -def NGetLnFrm : Format<26>; -def NSetLnFrm : Format<27>; -def NDupFrm : Format<28>; -def NLdStFrm : Format<29>; -def N1RegModImmFrm: Format<30>; -def N2RegFrm : Format<31>; -def NVCVTFrm : Format<32>; -def NVDupLnFrm : Format<33>; -def N2RegVShLFrm : Format<34>; -def N2RegVShRFrm : Format<35>; -def N3RegFrm : Format<36>; -def N3RegVShFrm : Format<37>; -def NVExtFrm : Format<38>; -def NVMulSLFrm : Format<39>; -def NVTBLFrm : Format<40>; +def SatFrm : Format<13>; +def ExtFrm : Format<14>; + +def VFPUnaryFrm : Format<15>; +def VFPBinaryFrm : Format<16>; +def VFPConv1Frm : Format<17>; +def VFPConv2Frm : Format<18>; +def VFPConv3Frm : Format<19>; +def VFPConv4Frm : Format<20>; +def VFPConv5Frm : Format<21>; +def VFPLdStFrm : Format<22>; +def VFPLdStMulFrm : Format<23>; +def VFPMiscFrm : Format<24>; + +def ThumbFrm : Format<25>; +def MiscFrm : Format<26>; + +def NGetLnFrm : Format<27>; +def NSetLnFrm : Format<28>; +def NDupFrm : Format<29>; +def NLdStFrm : Format<30>; +def N1RegModImmFrm: Format<31>; +def N2RegFrm : Format<32>; +def NVCVTFrm : Format<33>; +def NVDupLnFrm : Format<34>; +def N2RegVShLFrm : Format<35>; +def N2RegVShRFrm : Format<36>; +def N3RegFrm : Format<37>; +def N3RegVShFrm : Format<38>; +def NVExtFrm : Format<39>; +def NVMulSLFrm : Format<40>; +def NVTBLFrm : Format<41>; // Misc flags. @@ -87,21 +88,21 @@ class Xform16Bit { bit canXformTo16Bit = 1; } class AddrMode<bits<4> val> { bits<4> Value = val; } -def AddrModeNone : AddrMode<0>; -def AddrMode1 : AddrMode<1>; -def AddrMode2 : AddrMode<2>; -def AddrMode3 : AddrMode<3>; -def AddrMode4 : AddrMode<4>; -def AddrMode5 : AddrMode<5>; -def AddrMode6 : AddrMode<6>; -def AddrModeT1_1 : AddrMode<7>; -def AddrModeT1_2 : AddrMode<8>; -def AddrModeT1_4 : AddrMode<9>; -def AddrModeT1_s : AddrMode<10>; -def AddrModeT2_i12: AddrMode<11>; -def AddrModeT2_i8 : AddrMode<12>; -def AddrModeT2_so : AddrMode<13>; -def AddrModeT2_pc : AddrMode<14>; +def AddrModeNone : AddrMode<0>; +def AddrMode1 : AddrMode<1>; +def AddrMode2 : AddrMode<2>; +def AddrMode3 : AddrMode<3>; +def AddrMode4 : AddrMode<4>; +def AddrMode5 : AddrMode<5>; +def AddrMode6 : AddrMode<6>; +def AddrModeT1_1 : AddrMode<7>; +def AddrModeT1_2 : AddrMode<8>; +def AddrModeT1_4 : AddrMode<9>; +def AddrModeT1_s : AddrMode<10>; +def AddrModeT2_i12 : AddrMode<11>; +def AddrModeT2_i8 : AddrMode<12>; +def AddrModeT2_so : AddrMode<13>; +def AddrModeT2_pc : AddrMode<14>; def AddrModeT2_i8s4 : AddrMode<15>; // Instruction size. @@ -137,11 +138,17 @@ def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains // ARM special operands. // +def CondCodeOperand : AsmOperandClass { + let Name = "CondCode"; + let SuperClasses = []; +} + // ARM Predicate operand. Default to 14 = always (AL). Second part is CC // register whose default is 0 (no register). def pred : PredicateOperand<OtherVT, (ops i32imm, CCR), (ops (i32 14), (i32 zero_reg))> { let PrintMethod = "printPredicateOperand"; + let ParserMatchClass = CondCodeOperand; } // Conditional code result for instructions whose 's' bit is set, e.g. subs. @@ -240,6 +247,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } + // A few are not predicable class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, @@ -254,9 +262,9 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<Predicate> Predicates = [IsARM]; } -// Same as I except it can optionally modify CPSR. Note it's modeled as -// an input operand since by default it's a zero register. It will -// become an implicit def once it's "flipped". +// Same as I except it can optionally modify CPSR. Note it's modeled as an input +// operand since by default it's a zero register. It will become an implicit def +// once it's "flipped". class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, @@ -313,7 +321,7 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin, } class ABXIx2<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, itin, + : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, Pseudo, itin, asm, "", pattern>; // BR_JT instructions @@ -322,16 +330,14 @@ class JTI<dag oops, dag iops, InstrItinClass itin, : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin, asm, "", pattern>; - // Atomic load/store instructions - class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin, opc, asm, "", pattern> { let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; - let Inst{20} = 1; + let Inst{20} = 1; let Inst{11-0} = 0b111110011111; } class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, @@ -340,7 +346,7 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, opc, asm, "", pattern> { let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; - let Inst{20} = 0; + let Inst{20} = 0; let Inst{11-4} = 0b11111001; } @@ -350,21 +356,21 @@ class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern> { let Inst{24-21} = opcod; - let Inst{27-26} = {0,0}; + let Inst{27-26} = 0b00; } class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern> { let Inst{24-21} = opcod; - let Inst{27-26} = {0,0}; + let Inst{27-26} = 0b00; } class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin, asm, "", pattern> { let Inst{24-21} = opcod; - let Inst{27-26} = {0,0}; + let Inst{27-26} = 0b00; } class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -377,7 +383,7 @@ class AI2<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern> { - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // loads @@ -389,7 +395,7 @@ class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -399,7 +405,7 @@ class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -409,7 +415,7 @@ class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -419,7 +425,7 @@ class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // stores @@ -431,7 +437,7 @@ class AI2stw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -441,7 +447,7 @@ class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -451,7 +457,7 @@ class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> @@ -461,7 +467,7 @@ class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Pre-indexed loads @@ -473,7 +479,7 @@ class AI2ldwpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -483,7 +489,7 @@ class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Pre-indexed stores @@ -495,7 +501,7 @@ class AI2stwpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -505,7 +511,7 @@ class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 1; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Post-indexed loads @@ -517,7 +523,7 @@ class AI2ldwpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -527,7 +533,7 @@ class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // Post-indexed stores @@ -539,7 +545,7 @@ class AI2stwpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -549,7 +555,7 @@ class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 0; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-26} = 0b01; } // addrmode3 instructions @@ -977,7 +983,7 @@ class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3, Encoding { let Inst{31-27} = opcod1; let Inst{15-14} = opcod2; - let Inst{12} = opcod3; + let Inst{12} = opcod3; } // BR_JT instructions @@ -1099,13 +1105,13 @@ class T1Special<bits<4> opcode> : Encoding16 { // A6.2.4 Load/store single data item encoding. class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 { let Inst{15-12} = opA; - let Inst{11-9} = opB; + let Inst{11-9} = opB; } -class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>; +class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>; class T1LdSt4Imm<bits<3> opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes class T1LdSt1Imm<bits<3> opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte class T1LdSt2Imm<bits<3> opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes -class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative +class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative // A6.2.5 Miscellaneous 16-bit instructions encoding. class T1Misc<bits<7> opcode> : Encoding16 { @@ -1125,9 +1131,10 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<Predicate> Predicates = [IsThumb2]; } -// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as -// an input operand since by default it's a zero register. It will -// become an implicit def once it's "flipped". +// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an +// input operand since by default it's a zero register. It will become an +// implicit def once it's "flipped". +// // FIXME: This uses unified syntax so {s} comes before {p}. We should make it // more consistent. class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, @@ -1185,11 +1192,11 @@ class T2Ii8s4<bit P, bit W, bit load, dag oops, dag iops, InstrItinClass itin, pattern> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; - let Inst{24} = P; - let Inst{23} = ?; // The U bit. - let Inst{22} = 1; - let Inst{21} = W; - let Inst{20} = load; + let Inst{24} = P; + let Inst{23} = ?; // The U bit. + let Inst{22} = 1; + let Inst{21} = W; + let Inst{20} = load; } class T2sI<dag oops, dag iops, InstrItinClass itin, @@ -1225,14 +1232,14 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, list<Predicate> Predicates = [IsThumb2]; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; - let Inst{24} = signed; - let Inst{23} = 0; + let Inst{24} = signed; + let Inst{23} = 0; let Inst{22-21} = opcod; - let Inst{20} = load; - let Inst{11} = 1; + let Inst{20} = load; + let Inst{11} = 1; // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed - let Inst{10} = pre; // The P bit. - let Inst{8} = 1; // The W bit. + let Inst{10} = pre; // The P bit. + let Inst{8} = 1; // The W bit. } // Helper class for disassembly only @@ -1243,9 +1250,9 @@ class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops, : T2I<oops, iops, itin, opc, asm, pattern> { let Inst{31-27} = 0b11111; let Inst{26-24} = 0b011; - let Inst{23} = long; + let Inst{23} = long; let Inst{22-20} = op22_20; - let Inst{7-4} = op7_4; + let Inst{7-4} = op7_4; } // Tv5Pat - Same as Pat<>, but requires V5T Thumb mode. @@ -1325,9 +1332,9 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, } // Load / store multiple -class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, +class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : VFPXI<oops, iops, AddrMode5, Size4Bytes, im, + : VFPXI<oops, iops, AddrMode4, Size4Bytes, im, VFPLdStMulFrm, itin, asm, cstr, pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; @@ -1337,9 +1344,9 @@ class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, let D = VFPNeonDomain; } -class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin, +class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : VFPXI<oops, iops, AddrMode5, Size4Bytes, im, + : VFPXI<oops, iops, AddrMode4, Size4Bytes, im, VFPLdStMulFrm, itin, asm, cstr, pattern> { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; @@ -1367,8 +1374,8 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{11-8} = 0b1011; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{6} = op6; + let Inst{4} = op4; } // Double precision, binary, VML[AS] (for additional predicate) @@ -1379,12 +1386,11 @@ class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{11-8} = 0b1011; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{6} = op6; + let Inst{4} = op4; list<Predicate> Predicates = [HasVFP2, UseVMLx]; } - // Single precision, unary class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, @@ -1415,8 +1421,8 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{11-8} = 0b1010; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{6} = op6; + let Inst{4} = op4; } // Single precision binary, if no NEON @@ -1521,10 +1527,18 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm, cstr, pattern> { let Inst{31-24} = 0b11110100; - let Inst{23} = op23; + let Inst{23} = op23; let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{7-4} = op7_4; + let Inst{11-8} = op11_8; + let Inst{7-4} = op7_4; +} + +class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr> + : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr, + itin> { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + list<Predicate> Predicates = [HasNEON]; } class NDataI<dag oops, dag iops, Format f, InstrItinClass itin, @@ -1548,13 +1562,13 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6, string opc, string dt, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> { - let Inst{23} = op23; + let Inst{23} = op23; let Inst{21-19} = op21_19; - let Inst{11-8} = op11_8; - let Inst{7} = op7; - let Inst{6} = op6; - let Inst{5} = op5; - let Inst{4} = op4; + let Inst{11-8} = op11_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{5} = op5; + let Inst{4} = op4; } // NEON 2 vector register format. @@ -1567,9 +1581,9 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; let Inst{17-16} = op17_16; - let Inst{11-7} = op11_7; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; } // Same as N2V except it doesn't have a datatype suffix. @@ -1582,9 +1596,9 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; let Inst{17-16} = op17_16; - let Inst{11-7} = op11_7; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; } // NEON 2 vector register with immediate. @@ -1592,12 +1606,12 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> { - let Inst{24} = op24; - let Inst{23} = op23; + let Inst{24} = op24; + let Inst{23} = op23; let Inst{11-8} = op11_8; - let Inst{7} = op7; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{4} = op4; } // NEON 3 vector register format. @@ -1605,12 +1619,12 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> { - let Inst{24} = op24; - let Inst{23} = op23; + let Inst{24} = op24; + let Inst{23} = op23; let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; } // Same as N3V except it doesn't have a data type suffix. @@ -1619,12 +1633,12 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> { - let Inst{24} = op24; - let Inst{23} = op23; + let Inst{24} = op24; + let Inst{23} = op23; let Inst{21-20} = op21_20; - let Inst{11-8} = op11_8; - let Inst{6} = op6; - let Inst{4} = op4; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; } // NEON VMOVs between scalar and core registers. @@ -1634,9 +1648,9 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain, "", itin> { let Inst{27-20} = opcod1; - let Inst{11-8} = opcod2; - let Inst{6-5} = opcod3; - let Inst{4} = 1; + let Inst{11-8} = opcod2; + let Inst{6-5} = opcod3; + let Inst{4} = 1; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); @@ -1670,9 +1684,9 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops, let Inst{24-23} = 0b11; let Inst{21-20} = 0b11; let Inst{19-16} = op19_16; - let Inst{11-7} = 0b11000; - let Inst{6} = op6; - let Inst{4} = 0; + let Inst{11-7} = 0b11000; + let Inst{6} = op6; + let Inst{4} = 0; } // NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 51fc1522485fa..e66f9b9ad0ac5 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -44,6 +44,10 @@ def SDT_ARMBCC_i64 : SDTypeProfile<0, 6, SDTCisVT<3, i32>, SDTCisVT<4, i32>, SDTCisVT<5, OtherVT>]>; +def SDT_ARMAnd : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; + def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, @@ -54,13 +58,16 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>; -def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; -def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>; +def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>; +def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; @@ -99,11 +106,14 @@ def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, [SDNPHasChain]>; +def ARMand : SDNode<"ARMISD::AND", SDT_ARMAnd, + [SDNPOutFlag]>; + def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, [SDNPOutFlag]>; def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, - [SDNPOutFlag,SDNPCommutative]>; + [SDNPOutFlag, SDNPCommutative]>; def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; @@ -117,51 +127,54 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; -def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7, - [SDNPHasChain]>; -def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7, - [SDNPHasChain]>; -def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6, - [SDNPHasChain]>; -def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6, - [SDNPHasChain]>; +def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, + [SDNPHasChain]>; +def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER, + [SDNPHasChain]>; +def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR, + [SDNPHasChain]>; +def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR, + [SDNPHasChain]>; def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; + +def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // -def HasV4T : Predicate<"Subtarget->hasV4TOps()">; -def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; -def HasV5T : Predicate<"Subtarget->hasV5TOps()">; -def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; -def HasV6 : Predicate<"Subtarget->hasV6Ops()">; -def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; -def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; -def HasV7 : Predicate<"Subtarget->hasV7Ops()">; -def NoVFP : Predicate<"!Subtarget->hasVFP2()">; -def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; -def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; -def HasNEON : Predicate<"Subtarget->hasNEON()">; -def HasDivide : Predicate<"Subtarget->hasDivide()">; +def HasV4T : Predicate<"Subtarget->hasV4TOps()">; +def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; +def HasV5T : Predicate<"Subtarget->hasV5TOps()">; +def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; +def HasV6 : Predicate<"Subtarget->hasV6Ops()">; +def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; +def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; +def HasV7 : Predicate<"Subtarget->hasV7Ops()">; +def NoVFP : Predicate<"!Subtarget->hasVFP2()">; +def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; +def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; +def HasNEON : Predicate<"Subtarget->hasNEON()">; +def HasDivide : Predicate<"Subtarget->hasDivide()">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">; -def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; +def HasDB : Predicate<"Subtarget->hasDataBarrier()">; +def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; -def IsThumb : Predicate<"Subtarget->isThumb()">; -def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; -def IsThumb2 : Predicate<"Subtarget->isThumb2()">; -def IsARM : Predicate<"!Subtarget->isThumb()">; -def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; -def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; +def IsThumb : Predicate<"Subtarget->isThumb()">; +def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; +def IsThumb2 : Predicate<"Subtarget->isThumb2()">; +def IsARM : Predicate<"!Subtarget->isThumb()">; +def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; +def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; // FIXME: Eventually this will be just "hasV6T2Ops". -def UseMovt : Predicate<"Subtarget->useMovt()">; -def DontUseMovt : Predicate<"!Subtarget->useMovt()">; - -def UseVMLx : Predicate<"Subtarget->useVMLx()">; +def UseMovt : Predicate<"Subtarget->useMovt()">; +def DontUseMovt : Predicate<"!Subtarget->useMovt()">; +def UseVMLx : Predicate<"Subtarget->useVMLx()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -221,29 +234,12 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{ /// e.g., 0xf000ffff def bf_inv_mask_imm : Operand<i32>, PatLeaf<(imm), [{ - uint32_t v = (uint32_t)N->getZExtValue(); - if (v == 0xffffffff) - return 0; - // there can be 1's on either or both "outsides", all the "inside" - // bits must be 0's - unsigned int lsb = 0, msb = 31; - while (v & (1 << msb)) --msb; - while (v & (1 << lsb)) ++lsb; - for (unsigned int i = lsb; i <= msb; ++i) { - if (v & (1 << i)) - return 0; - } - return 1; + return ARM::isBitFieldInvertedMask(N->getZExtValue()); }] > { let PrintMethod = "printBitfieldInvMaskImmOperand"; } /// Split a 32-bit immediate into two 16 bit parts. -def lo16 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() & 0xffff, - MVT::i32); -}]>; - def hi16 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32); }]>; @@ -306,6 +302,13 @@ def pclabel : Operand<i32> { let PrintMethod = "printPCLabel"; } +// shift_imm: An integer that encodes a shift amount and the type of shift +// (currently either asr or lsl) using the same encoding used for the +// immediates in so_reg operands. +def shift_imm : Operand<i32> { + let PrintMethod = "printShiftImmOperand"; +} + // shifter_operand operands: so_reg and so_imm. def so_reg : Operand<i32>, // reg reg imm ComplexPattern<i32, 3, "SelectShifterOperandReg", @@ -319,10 +322,7 @@ def so_reg : Operand<i32>, // reg reg imm // represented in the imm field in the same 12-bit form that they are encoded // into so_imm instructions: the 8-bit immediate is the least significant bits // [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11]. -def so_imm : Operand<i32>, - PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; - }]> { +def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> { let PrintMethod = "printSOImmOperand"; } @@ -452,11 +452,15 @@ include "ARMInstrFormats.td" /// binop that produces a value. multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { + // The register-immediate version is re-materializable. This is useful + // in particular for taking the address of a local. + let isReMaterializable = 1 in { def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> { let Inst{25} = 1; } + } def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { @@ -502,7 +506,7 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, /// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test /// patterns. Similar to AsI1_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. -let Defs = [CPSR] in { +let isCompare = 1, Defs = [CPSR] in { multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi, @@ -1117,7 +1121,7 @@ let isBranch = 1, isTerminator = 1 in { let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target \n$jt", + IIC_Br, "mov\tpc, $target$jt", [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { let Inst{11-4} = 0b00000000; let Inst{15-12} = 0b1111; @@ -1127,7 +1131,7 @@ let isBranch = 1, isTerminator = 1 in { } def BR_JTm : JTI<(outs), (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "ldr\tpc, $target \n$jt", + IIC_Br, "ldr\tpc, $target$jt", [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, imm:$id)]> { let Inst{15-12} = 0b1111; @@ -1139,7 +1143,7 @@ let isBranch = 1, isTerminator = 1 in { } def BR_JTadd : JTI<(outs), (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "add\tpc, $target, $idx \n$jt", + IIC_Br, "add\tpc, $target, $idx$jt", [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]> { let Inst{15-12} = 0b1111; @@ -1573,8 +1577,12 @@ defm UXTH : AI_unary_rrot<0b01101111, defm UXTB16 : AI_unary_rrot<0b01101100, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; -def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), - (UXTB16r_rot GPR:$Src, 24)>; +// FIXME: This pattern incorrectly assumes the shl operator is a rotate. +// The transformation should probably be done as a combiner action +// instead so we can include a check for masking back in the upper +// eight bits of the source into the lower eight bits of the result. +//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), +// (UXTB16r_rot GPR:$Src, 24)>; def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (UXTB16r_rot GPR:$Src, 8)>; @@ -1631,16 +1639,24 @@ defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; -// These don't define reg/reg forms, because they are handled above. def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, "rsb", "\t$dst, $a, $b", - [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> { + IIC_iALUi, "rsb", "\t$dst, $a, $b", + [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> { let Inst{25} = 1; } +// The reg/reg form is only defined for the disassembler; for codegen it is +// equivalent to SUBrr. +def RSBrr : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, + IIC_iALUr, "rsb", "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]> { + let Inst{25} = 0; + let Inst{11-4} = 0b00000000; +} + def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, "rsb", "\t$dst, $a, $b", - [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> { + IIC_iALUsr, "rsb", "\t$dst, $a, $b", + [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> { let Inst{25} = 0; } @@ -1667,6 +1683,14 @@ def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), Requires<[IsARM]> { let Inst{25} = 1; } +// The reg/reg form is only defined for the disassembler; for codegen it is +// equivalent to SUBrr. +def RSCrr : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + DPFrm, IIC_iALUr, "rsc", "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]> { + let Inst{25} = 0; + let Inst{11-4} = 0b00000000; +} def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b", [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>, @@ -1716,24 +1740,26 @@ def : ARMPat<(adde GPR:$src, so_imm_not:$imm), // ARM Arithmetic Instruction -- for disassembly only // GPR:$dst = GPR:$a op GPR:$b -class AAI<bits<8> op27_20, bits<4> op7_4, string opc> +class AAI<bits<8> op27_20, bits<4> op7_4, string opc, + list<dag> pattern = [/* For disassembly only; pattern left blank */]> : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, - opc, "\t$dst, $a, $b", - [/* For disassembly only; pattern left blank */]> { + opc, "\t$dst, $a, $b", pattern> { let Inst{27-20} = op27_20; let Inst{7-4} = op7_4; } // Saturating add/subtract -- for disassembly only -def QADD : AAI<0b00010000, 0b0101, "qadd">; +def QADD : AAI<0b00010000, 0b0101, "qadd", + [(set GPR:$dst, (int_arm_qadd GPR:$a, GPR:$b))]>; def QADD16 : AAI<0b01100010, 0b0001, "qadd16">; def QADD8 : AAI<0b01100010, 0b1001, "qadd8">; def QASX : AAI<0b01100010, 0b0011, "qasx">; def QDADD : AAI<0b00010100, 0b0101, "qdadd">; def QDSUB : AAI<0b00010110, 0b0101, "qdsub">; def QSAX : AAI<0b01100010, 0b0101, "qsax">; -def QSUB : AAI<0b00010010, 0b0101, "qsub">; +def QSUB : AAI<0b00010010, 0b0101, "qsub", + [(set GPR:$dst, (int_arm_qsub GPR:$a, GPR:$b))]>; def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">; def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">; def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">; @@ -1793,54 +1819,45 @@ def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), // Signed/Unsigned saturate -- for disassembly only -def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{27-21} = 0b0110101; - let Inst{6-4} = 0b001; -} - -def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def SSAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh), + SatFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{27-21} = 0b0110101; - let Inst{6-4} = 0b101; + let Inst{5-4} = 0b01; } -def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm, +def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm, NoItinerary, "ssat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{27-20} = 0b01101010; let Inst{7-4} = 0b0011; } -def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{27-21} = 0b0110111; - let Inst{6-4} = 0b001; -} - -def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), - DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def USAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh), + SatFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{27-21} = 0b0110111; - let Inst{6-4} = 0b101; + let Inst{5-4} = 0b01; } -def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm, +def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm, NoItinerary, "usat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{27-20} = 0b01101110; let Inst{7-4} = 0b0011; } +def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>; +def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>; + //===----------------------------------------------------------------------===// // Bitwise Instructions. // defm AND : AsI1_bin_irs<0b0000, "and", BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; +defm ANDS : AI1_bin_s_irs<0b0000, "and", + BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>; defm ORR : AsI1_bin_irs<0b1100, "orr", BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; defm EOR : AsI1_bin_irs<0b0001, "eor", @@ -1858,11 +1875,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), } // A8.6.18 BFI - Bitfield insert (Encoding A1) -// Added for disassembler with the pattern field purposely left blank. -def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), +def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm), AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfi", "\t$dst, $src, $imm", "", - [/* For disassembly only; pattern left blank */]>, + "bfi", "\t$dst, $val, $imm", "$src = $dst", + [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val, + bf_inv_mask_imm:$imm))]>, Requires<[IsARM, HasV6T2]> { let Inst{27-21} = 0b0111110; let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 @@ -2232,11 +2249,20 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, let Inst{19-16} = 0b1111; } +def lsl_shift_imm : SDNodeXForm<imm, [{ + unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue()); + return CurDAG->getTargetConstant(Sh, MVT::i32); +}]>; + +def lsl_amt : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() < 32); +}], lsl_shift_imm>; + def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), - (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", + (ins GPR:$src1, GPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), - (and (shl GPR:$src2, (i32 imm:$shamt)), + (and (shl GPR:$src2, lsl_amt:$sh), 0xFFFF0000)))]>, Requires<[IsARM, HasV6]> { let Inst{6-4} = 0b001; @@ -2245,26 +2271,37 @@ def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), // Alternate cases for PKHBT where identities eliminate some nodes. def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), (PKHBT GPR:$src1, GPR:$src2, 0)>; -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), - (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$sh)), + (PKHBT GPR:$src1, GPR:$src2, (lsl_shift_imm imm16_31:$sh))>; + +def asr_shift_imm : SDNodeXForm<imm, [{ + unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue()); + return CurDAG->getTargetConstant(Sh, MVT::i32); +}]>; +def asr_amt : PatLeaf<(i32 imm), [{ + return (N->getZExtValue() <= 32); +}], asr_shift_imm>; +// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and +// will match the pattern below. def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst), - (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", + (ins GPR:$src1, GPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), - (and (sra GPR:$src2, imm16_31:$shamt), - 0xFFFF)))]>, Requires<[IsARM, HasV6]> { + (and (sra GPR:$src2, asr_amt:$sh), + 0xFFFF)))]>, + Requires<[IsARM, HasV6]> { let Inst{6-4} = 0b101; } // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))), - (PKHTB GPR:$src1, GPR:$src2, 16)>; +def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)), + (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>; def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), - (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), - (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>; + (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)), + (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>; //===----------------------------------------------------------------------===// // Comparison Instructions... @@ -2272,8 +2309,52 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), defm CMP : AI1_cmp_irs<0b1010, "cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; -//FIXME: Disable CMN, as CCodes are backwards from compare expectations -// Compare-to-zero still works out, just not the relationals + +// FIXME: There seems to be a (potential) hardware bug with the CMN instruction +// and comparison with 0. These two pieces of code should give identical +// results: +// +// rsbs r1, r1, 0 +// cmp r0, r1 +// mov r0, #0 +// it ls +// mov r0, #1 +// +// and: +// +// cmn r0, r1 +// mov r0, #0 +// it ls +// mov r0, #1 +// +// However, the CMN gives the *opposite* result when r1 is 0. This is because +// the carry flag is set in the CMP case but not in the CMN case. In short, the +// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the +// value of r0 and the carry bit (because the "carry bit" parameter to +// AddWithCarry is defined as 1 in this case, the carry flag will always be set +// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is +// never a "carry" when this AddWithCarry is performed (because the "carry bit" +// parameter to AddWithCarry is defined as 0). +// +// The AddWithCarry in the CMP case seems to be relying upon the identity: +// +// ~x + 1 = -x +// +// However when x is 0 and unsigned, this doesn't hold: +// +// x = 0 +// ~x = 0xFFFF FFFF +// ~x + 1 = 0x1 0000 0000 +// (-x = 0) != (0x1 0000 0000 = ~x + 1) +// +// Therefore, we should disable *all* versions of CMN, especially when comparing +// against zero, until we can limit when the CMN instruction is used (when we +// know that the RHS is not 0) or when we have a hardware fix for this. +// +// (See the ARM docs for the "AddWithCarry" pseudo-code.) +// +// This is related to <rdar://problem/7569620>. +// //defm CMN : AI1_cmp_irs<0b1011, "cmn", // BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; @@ -2298,8 +2379,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, Defs = [CPSR] in { def BCCi64 : PseudoInst<(outs), - (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), - IIC_Br, + (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), + IIC_Br, "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc", [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>; @@ -2346,102 +2427,63 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def Int_MemBarrierV7 : AInoP<(outs), (ins), - Pseudo, NoItinerary, - "dmb", "", - [(ARMMemBarrierV7)]>, - Requires<[IsARM, HasV7]> { +def DMBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dmb", "", + [(ARMMemBarrier)]>, Requires<[IsARM, HasDB]> { let Inst{31-4} = 0xf57ff05; // FIXME: add support for options other than a full system DMB // See DMB disassembly-only variants below. let Inst{3-0} = 0b1111; } -def Int_SyncBarrierV7 : AInoP<(outs), (ins), - Pseudo, NoItinerary, - "dsb", "", - [(ARMSyncBarrierV7)]>, - Requires<[IsARM, HasV7]> { +def DSBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dsb", "", + [(ARMSyncBarrier)]>, Requires<[IsARM, HasDB]> { let Inst{31-4} = 0xf57ff04; // FIXME: add support for options other than a full system DSB // See DSB disassembly-only variants below. let Inst{3-0} = 0b1111; } -def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero), - Pseudo, NoItinerary, +def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary, "mcr", "\tp15, 0, $zero, c7, c10, 5", - [(ARMMemBarrierV6 GPR:$zero)]>, + [(ARMMemBarrierMCR GPR:$zero)]>, Requires<[IsARM, HasV6]> { // FIXME: add support for options other than a full system DMB // FIXME: add encoding } -def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero), - Pseudo, NoItinerary, +def DSB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary, "mcr", "\tp15, 0, $zero, c7, c10, 4", - [(ARMSyncBarrierV6 GPR:$zero)]>, + [(ARMSyncBarrierMCR GPR:$zero)]>, Requires<[IsARM, HasV6]> { // FIXME: add support for options other than a full system DSB // FIXME: add encoding } } -// Helper class for multiclass MemB -- for disassembly only -class AMBI<string opc, string asm> - : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm, - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV7]> { - let Inst{31-20} = 0xf57; -} - -multiclass MemB<bits<4> op7_4, string opc> { - - def st : AMBI<opc, "\tst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1110; - } - - def ish : AMBI<opc, "\tish"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1011; - } - - def ishst : AMBI<opc, "\tishst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1010; - } - - def nsh : AMBI<opc, "\tnsh"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0111; - } - - def nshst : AMBI<opc, "\tnshst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0110; - } +// Memory Barrier Operations Variants -- for disassembly only - def osh : AMBI<opc, "\tosh"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0011; - } +def memb_opt : Operand<i32> { + let PrintMethod = "printMemBOption"; +} - def oshst : AMBI<opc, "\toshst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0010; - } +class AMBI<bits<4> op7_4, string opc> + : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, opc, "\t$opt", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasDB]> { + let Inst{31-8} = 0xf57ff0; + let Inst{7-4} = op7_4; } // These DMB variants are for disassembly only. -defm DMB : MemB<0b0101, "dmb">; +def DMBvar : AMBI<0b0101, "dmb">; // These DSB variants are for disassembly only. -defm DSB : MemB<0b0100, "dsb">; +def DSBvar : AMBI<0b0100, "dsb">; // ISB has only full system option -- for disassembly only -def ISBsy : AMBI<"isb", ""> { - let Inst{7-4} = 0b0110; +def ISBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>, + Requires<[IsARM, HasDB]> { + let Inst{31-4} = 0xf57ff06; let Inst{3-0} = 0b1111; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 7f7eb980abe83..4d2f1169061ff 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -93,6 +93,11 @@ def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; +def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisSameAs<1, 2>]>; +def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; +def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; + def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; @@ -100,14 +105,14 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); - unsigned EltBits; + unsigned EltBits = 0; uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 32 && EltVal == 0); }]>; def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); - unsigned EltBits; + unsigned EltBits = 0; uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 8 && EltVal == 0xff); }]>; @@ -124,15 +129,16 @@ def nModImm : Operand<i32> { // NEON load / store instructions //===----------------------------------------------------------------------===// -let mayLoad = 1, neverHasSideEffects = 1 in { // Use vldmia to load a Q register as a D register pair. // This is equivalent to VLDMD except that it has a Q register operand // instead of a pair of D registers. def VLDMQ - : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p), + : AXDI4<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p), IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>; + "vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "", + [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>; +let mayLoad = 1, neverHasSideEffects = 1 in { // Use vld1 to load a Q register as a D register pair. // This alternative to VLDMQ allows an alignment to be specified. // This is equivalent to VLD1q64 except that it has a Q register operand. @@ -141,15 +147,16 @@ def VLD1q IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; } // mayLoad = 1, neverHasSideEffects = 1 -let mayStore = 1, neverHasSideEffects = 1 in { // Use vstmia to store a Q register as a D register pair. // This is equivalent to VSTMD except that it has a Q register operand // instead of a pair of D registers. def VSTMQ - : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p), + : AXDI4<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p), IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>; + "vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "", + [(store (v2f64 QPR:$src), addrmode4:$addr)]>; +let mayStore = 1, neverHasSideEffects = 1 in { // Use vst1 to store a Q register as a D register pair. // This alternative to VSTMQ allows an alignment to be specified. // This is equivalent to VST1q64 except that it has a Q register operand. @@ -160,6 +167,25 @@ def VST1q let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +// Classes for VLD* pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VLDQPseudo + : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">; +class VLDQWBPseudo + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VST, + "$addr.addr = $wb">; +class VLDQQPseudo + : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">; +class VLDQQWBPseudo + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VST, + "$addr.addr = $wb">; +class VLDQQQQWBPseudo + : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST, + "$addr.addr = $wb, $src = $dst">; + // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), @@ -180,6 +206,11 @@ def VLD1q16 : VLD1Q<0b0100, "16">; def VLD1q32 : VLD1Q<0b1000, "32">; def VLD1q64 : VLD1Q<0b1100, "64">; +def VLD1q8Pseudo : VLDQPseudo; +def VLD1q16Pseudo : VLDQPseudo; +def VLD1q32Pseudo : VLDQPseudo; +def VLD1q64Pseudo : VLDQPseudo; + // ...with address register writeback: class VLD1DWB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), @@ -202,6 +233,11 @@ def VLD1q16_UPD : VLD1QWB<0b0100, "16">; def VLD1q32_UPD : VLD1QWB<0b1000, "32">; def VLD1q64_UPD : VLD1QWB<0b1100, "64">; +def VLD1q8Pseudo_UPD : VLDQWBPseudo; +def VLD1q16Pseudo_UPD : VLDQWBPseudo; +def VLD1q32Pseudo_UPD : VLDQWBPseudo; +def VLD1q64Pseudo_UPD : VLDQWBPseudo; + // ...with 3 registers (some of these are only for the disassembler): class VLD1D3<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), @@ -222,6 +258,9 @@ def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">; +def VLD1d64TPseudo : VLDQQPseudo; +def VLD1d64TPseudo_UPD : VLDQQWBPseudo; + // ...with 4 registers (some of these are only for the disassembler): class VLD1D4<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), @@ -244,6 +283,9 @@ def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">; +def VLD1d64QPseudo : VLDQQPseudo; +def VLD1d64QPseudo_UPD : VLDQQWBPseudo; + // VLD2 : Vector Load (multiple 2-element structures) class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), @@ -263,6 +305,14 @@ def VLD2q8 : VLD2Q<0b0000, "8">; def VLD2q16 : VLD2Q<0b0100, "16">; def VLD2q32 : VLD2Q<0b1000, "32">; +def VLD2d8Pseudo : VLDQPseudo; +def VLD2d16Pseudo : VLDQPseudo; +def VLD2d32Pseudo : VLDQPseudo; + +def VLD2q8Pseudo : VLDQQPseudo; +def VLD2q16Pseudo : VLDQQPseudo; +def VLD2q32Pseudo : VLDQQPseudo; + // ...with address register writeback: class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), @@ -284,6 +334,14 @@ def VLD2q8_UPD : VLD2QWB<0b0000, "8">; def VLD2q16_UPD : VLD2QWB<0b0100, "16">; def VLD2q32_UPD : VLD2QWB<0b1000, "32">; +def VLD2d8Pseudo_UPD : VLDQWBPseudo; +def VLD2d16Pseudo_UPD : VLDQWBPseudo; +def VLD2d32Pseudo_UPD : VLDQWBPseudo; + +def VLD2q8Pseudo_UPD : VLDQQWBPseudo; +def VLD2q16Pseudo_UPD : VLDQQWBPseudo; +def VLD2q32Pseudo_UPD : VLDQQWBPseudo; + // ...with double-spaced registers (for disassembly only): def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; @@ -302,6 +360,10 @@ def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; +def VLD3d8Pseudo : VLDQQPseudo; +def VLD3d16Pseudo : VLDQQPseudo; +def VLD3d32Pseudo : VLDQQPseudo; + // ...with address register writeback: class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, @@ -314,6 +376,10 @@ def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; +def VLD3d8Pseudo_UPD : VLDQQWBPseudo; +def VLD3d16Pseudo_UPD : VLDQQWBPseudo; +def VLD3d32Pseudo_UPD : VLDQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; @@ -322,10 +388,14 @@ def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; +def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo; +def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo; +def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">; -def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">; -def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">; +def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -338,6 +408,10 @@ def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; +def VLD4d8Pseudo : VLDQQPseudo; +def VLD4d16Pseudo : VLDQQPseudo; +def VLD4d32Pseudo : VLDQQPseudo; + // ...with address register writeback: class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, @@ -350,6 +424,10 @@ def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; +def VLD4d8Pseudo_UPD : VLDQQWBPseudo; +def VLD4d16Pseudo_UPD : VLDQQWBPseudo; +def VLD4d32Pseudo_UPD : VLDQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; @@ -358,10 +436,14 @@ def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; +def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo; +def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo; +def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">; -def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">; -def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">; +def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo; // VLD1LN : Vector Load (single element to one lane) // FIXME: Not yet implemented. @@ -486,6 +568,25 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { +// Classes for VST* pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VSTQPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">; +class VSTQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, + "$addr.addr = $wb">; +class VSTQQPseudo + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">; +class VSTQQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST, + "$addr.addr = $wb">; +class VSTQQQQWBPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST, + "$addr.addr = $wb">; + // VST1 : Vector Store (multiple single elements) class VST1D<bits<4> op7_4, string Dt> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, @@ -505,6 +606,11 @@ def VST1q16 : VST1Q<0b0100, "16">; def VST1q32 : VST1Q<0b1000, "32">; def VST1q64 : VST1Q<0b1100, "64">; +def VST1q8Pseudo : VSTQPseudo; +def VST1q16Pseudo : VSTQPseudo; +def VST1q32Pseudo : VSTQPseudo; +def VST1q64Pseudo : VSTQPseudo; + // ...with address register writeback: class VST1DWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), @@ -525,6 +631,11 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">; def VST1q32_UPD : VST1QWB<0b1000, "32">; def VST1q64_UPD : VST1QWB<0b1100, "64">; +def VST1q8Pseudo_UPD : VSTQWBPseudo; +def VST1q16Pseudo_UPD : VSTQWBPseudo; +def VST1q32Pseudo_UPD : VSTQWBPseudo; +def VST1q64Pseudo_UPD : VSTQWBPseudo; + // ...with 3 registers (some of these are only for the disassembler): class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), @@ -547,6 +658,9 @@ def VST1d16T_UPD : VST1D3WB<0b0100, "16">; def VST1d32T_UPD : VST1D3WB<0b1000, "32">; def VST1d64T_UPD : VST1D3WB<0b1100, "64">; +def VST1d64TPseudo : VSTQQPseudo; +def VST1d64TPseudo_UPD : VSTQQWBPseudo; + // ...with 4 registers (some of these are only for the disassembler): class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), @@ -570,6 +684,9 @@ def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; def VST1d64Q_UPD : VST1D4WB<0b1100, "64">; +def VST1d64QPseudo : VSTQQPseudo; +def VST1d64QPseudo_UPD : VSTQQWBPseudo; + // VST2 : Vector Store (multiple 2-element structures) class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), @@ -589,6 +706,14 @@ def VST2q8 : VST2Q<0b0000, "8">; def VST2q16 : VST2Q<0b0100, "16">; def VST2q32 : VST2Q<0b1000, "32">; +def VST2d8Pseudo : VSTQPseudo; +def VST2d16Pseudo : VSTQPseudo; +def VST2d32Pseudo : VSTQPseudo; + +def VST2q8Pseudo : VSTQQPseudo; +def VST2q16Pseudo : VSTQQPseudo; +def VST2q32Pseudo : VSTQQPseudo; + // ...with address register writeback: class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -610,6 +735,14 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">; def VST2q16_UPD : VST2QWB<0b0100, "16">; def VST2q32_UPD : VST2QWB<0b1000, "32">; +def VST2d8Pseudo_UPD : VSTQWBPseudo; +def VST2d16Pseudo_UPD : VSTQWBPseudo; +def VST2d32Pseudo_UPD : VSTQWBPseudo; + +def VST2q8Pseudo_UPD : VSTQQWBPseudo; +def VST2q16Pseudo_UPD : VSTQQWBPseudo; +def VST2q32Pseudo_UPD : VSTQQWBPseudo; + // ...with double-spaced registers (for disassembly only): def VST2b8 : VST2D<0b1001, 0b0000, "8">; def VST2b16 : VST2D<0b1001, 0b0100, "16">; @@ -628,6 +761,10 @@ def VST3d8 : VST3D<0b0100, 0b0000, "8">; def VST3d16 : VST3D<0b0100, 0b0100, "16">; def VST3d32 : VST3D<0b0100, 0b1000, "32">; +def VST3d8Pseudo : VSTQQPseudo; +def VST3d16Pseudo : VSTQQPseudo; +def VST3d32Pseudo : VSTQQPseudo; + // ...with address register writeback: class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -640,6 +777,10 @@ def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; +def VST3d8Pseudo_UPD : VSTQQWBPseudo; +def VST3d16Pseudo_UPD : VSTQQWBPseudo; +def VST3d32Pseudo_UPD : VSTQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VST3q8 : VST3D<0b0101, 0b0000, "8">; def VST3q16 : VST3D<0b0101, 0b0100, "16">; @@ -648,10 +789,14 @@ def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; +def VST3q8Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q16Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q32Pseudo_UPD : VSTQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">; -def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">; -def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">; +def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo; // VST4 : Vector Store (multiple 4-element structures) class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -664,6 +809,10 @@ def VST4d8 : VST4D<0b0000, 0b0000, "8">; def VST4d16 : VST4D<0b0000, 0b0100, "16">; def VST4d32 : VST4D<0b0000, 0b1000, "32">; +def VST4d8Pseudo : VSTQQPseudo; +def VST4d16Pseudo : VSTQQPseudo; +def VST4d32Pseudo : VSTQQPseudo; + // ...with address register writeback: class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), @@ -676,6 +825,10 @@ def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; +def VST4d8Pseudo_UPD : VSTQQWBPseudo; +def VST4d16Pseudo_UPD : VSTQQWBPseudo; +def VST4d32Pseudo_UPD : VSTQQWBPseudo; + // ...with double-spaced registers (non-updating versions for disassembly only): def VST4q8 : VST4D<0b0001, 0b0000, "8">; def VST4q16 : VST4D<0b0001, 0b0100, "16">; @@ -684,10 +837,14 @@ def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; +def VST4q8Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q16Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q32Pseudo_UPD : VSTQQQQWBPseudo; + // ...alternate versions to be allocated odd register numbers: -def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">; -def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">; -def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">; +def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; // VST1LN : Vector Store (single element from one lane) // FIXME: Not yet implemented. @@ -879,6 +1036,15 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; +// Narrow 2-register operations. +class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyD, ValueType TyQ, SDNode OpNode> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), + (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", + [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>; + // Narrow 2-register intrinsics. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -888,14 +1054,14 @@ class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; -// Long 2-register intrinsics (currently only used for VMOVL). -class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp> +// Long 2-register operations (currently only used for VMOVL). +class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst), (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; + [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>; // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> @@ -1150,6 +1316,24 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, (ResTy (NEONvduplane (OpTy DPR_8:$src3), imm:$lane)))))))]>; +// Neon Intrinsic-Op instructions (VABA): double- and quad-register. +class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set DPR:$dst, (Ty (OpNode DPR:$src1, + (Ty (IntOp (Ty DPR:$src2), (Ty DPR:$src3))))))]>; +class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 1, op4, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, (Ty (OpNode QPR:$src1, + (Ty (IntOp (Ty QPR:$src2), (Ty QPR:$src3))))))]>; + // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, @@ -1169,6 +1353,53 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; +// Long Multiply-Add/Sub operations. +class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, (OpNode (TyQ QPR:$src1), + (TyQ (MulOp (TyD DPR:$src2), + (TyD DPR:$src3)))))]>; +class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), + (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", + [(set QPR:$dst, + (OpNode (TyQ QPR:$src1), + (TyQ (MulOp (TyD DPR:$src2), + (TyD (NEONvduplane (TyD DPR_VFP2:$src3), + imm:$lane))))))]>; +class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), + (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), + NVMulSLFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", + [(set QPR:$dst, + (OpNode (TyQ QPR:$src1), + (TyQ (MulOp (TyD DPR:$src2), + (TyD (NEONvduplane (TyD DPR_8:$src3), + imm:$lane))))))]>; + +// Long Intrinsic-Op vector operations with explicit extend (VABAL). +class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", + [(set QPR:$dst, (OpNode (TyQ QPR:$src1), + (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src2), + (TyD DPR:$src3)))))))]>; + // Neon Long 3-argument intrinsic. The destination register is // a quad-register and is also used as the first source operand register. class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, @@ -1217,6 +1448,61 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let isCommutable = Commutable; } +// Long 3-register operations. +class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> { + let isCommutable = Commutable; +} +class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + [(set QPR:$dst, + (TyQ (OpNode (TyD DPR:$src1), + (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>; +class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode> + : N3V<op24, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + [(set QPR:$dst, + (TyQ (OpNode (TyD DPR:$src1), + (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>; + +// Long 3-register operations with explicitly extended operands. +class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, + bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))), + (TyQ (ExtOp (TyD DPR:$src2)))))]> { + let isCommutable = Commutable; +} + +// Long 3-register intrinsics with explicit extend (VABDL). +class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1), + (TyD DPR:$src2))))))]> { + let isCommutable = Commutable; +} + // Long 3-register intrinsics. class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, @@ -1248,14 +1534,15 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, (OpTy (NEONvduplane (OpTy DPR_8:$src2), imm:$lane)))))]>; -// Wide 3-register intrinsics. -class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, - Intrinsic IntOp, bit Commutable> +// Wide 3-register operations. +class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, + SDNode OpNode, SDNode ExtOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD, OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { + [(set QPR:$dst, (OpNode (TyQ QPR:$src1), + (TyQ (ExtOp (TyD DPR:$src2)))))]> { let isCommutable = Commutable; } @@ -1488,6 +1775,23 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, } +// Neon Narrowing 2-register vector operations, +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, OpNode>; + def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, OpNode>; + def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, OpNode>; +} + // Neon Narrowing 2-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, @@ -1508,14 +1812,14 @@ multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). // source operand element sizes of 16, 32 and 64 bits: -multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, - string OpcodeStr, string Dt, Intrinsic IntOp> { - def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; - def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; - def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; +multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, + string OpcodeStr, string Dt, SDNode OpNode> { + def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; + def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; + def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; } @@ -1607,6 +1911,47 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, } +// Neon Long 3-register vector operations. + +multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> { + def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, OpNode, Commutable>; + def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, OpNode, Commutable>; + def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, OpNode, Commutable>; +} + +multiclass N3VLSL_HS<bit op24, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, + !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; + def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, + !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; +} + +multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, + SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, OpNode, ExtOp, Commutable>; + def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, OpNode, ExtOp, Commutable>; + def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, OpNode, ExtOp, Commutable>; +} + // Neon Long 3-register vector intrinsics. // First with only element sizes of 16 and 32 bits: @@ -1643,21 +1988,36 @@ multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, v8i16, v8i8, IntOp, Commutable>; } +// ....with explicit extend (VABDL). +multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, IntOp, ExtOp, Commutable>; + def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, IntOp, ExtOp, Commutable>; + def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, IntOp, ExtOp, Commutable>; +} + // Neon Wide 3-register vector intrinsics, // source operand element sizes of 8, 16 and 32 bits: -multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { - def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, - OpcodeStr, !strconcat(Dt, "8"), - v8i16, v8i8, IntOp, Commutable>; - def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, - OpcodeStr, !strconcat(Dt, "16"), - v4i32, v4i16, IntOp, Commutable>; - def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, - OpcodeStr, !strconcat(Dt, "32"), - v2i64, v2i32, IntOp, Commutable>; +multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, + SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, OpNode, ExtOp, Commutable>; + def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, OpNode, ExtOp, Commutable>; + def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, OpNode, ExtOp, Commutable>; } @@ -1700,6 +2060,29 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8, mul, ShOp>; } +// Neon Intrinsic-Op vector operations, +// element sizes of 8, 16 and 32 bits: +multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, + string OpcodeStr, string Dt, Intrinsic IntOp, + SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; + def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; + def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, + OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; + + // 128-bit vector types. + def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; + def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; + def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, + OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; +} + // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, @@ -1723,6 +2106,29 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, } +// Neon Long Multiply-Op vector operations, +// element sizes of 8, 16 and 32 bits: +multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, SDNode MulOp, + SDNode OpNode> { + def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, + !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; + def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, + !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; + def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, + !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; +} + +multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, + string Dt, SDNode MulOp, SDNode OpNode> { + def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, + !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; + def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, + !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; +} + + // Neon Long 3-argument intrinsics. // First with only element sizes of 16 and 32 bits: @@ -1752,6 +2158,21 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; } +// ....with explicit extend (VABAL). +multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { + def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, + IntOp, ExtOp, OpNode>; + def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, + IntOp, ExtOp, OpNode>; + def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, + IntOp, ExtOp, OpNode>; +} + // Neon 2-register vector intrinsics, // element sizes of 8, 16 and 32 bits: @@ -1996,13 +2417,13 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "s", int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "u", int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "s", add, sext, 1>; +defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "u", add, zext, 1>; // VADDW : Vector Add Wide (Q = Q + D) -defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; -defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; +defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; +defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; // VHADD : Vector Halving Add defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, @@ -2113,16 +2534,14 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "s", int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "u", int_arm_neon_vmullu, 1>; +defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", NEONvmulls, 1>; +defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", NEONvmullu, 1>; def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", v8i16, v8i8, int_arm_neon_vmullp, 1>; -defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", - int_arm_neon_vmulls>; -defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", - int_arm_neon_vmullu>; +defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; +defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, @@ -2172,13 +2591,13 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) -defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlal", "s", int_arm_neon_vmlals>; -defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlal", "u", int_arm_neon_vmlalu>; +defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "s", NEONvmulls, add>; +defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "u", NEONvmullu, add>; -defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; -defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; +defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; +defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, @@ -2224,13 +2643,13 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) -defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlsl", "s", int_arm_neon_vmlsls>; -defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, - "vmlsl", "u", int_arm_neon_vmlslu>; +defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "s", NEONvmulls, sub>; +defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "u", NEONvmullu, sub>; -defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; -defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; +defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; +defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, @@ -2247,13 +2666,13 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "s", int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "u", int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "s", sub, sext, 0>; +defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "u", sub, zext, 0>; // VSUBW : Vector Subtract Wide (Q = Q - D) -defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; -defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; +defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; +defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; // VHSUB : Vector Halving Subtract defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, @@ -2469,32 +2888,32 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, // VABD : Vector Absolute Difference defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "s", int_arm_neon_vabds, 0>; + "vabd", "s", int_arm_neon_vabds, 1>; defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "u", int_arm_neon_vabdu, 0>; + "vabd", "u", int_arm_neon_vabdu, 1>; def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, - "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; + "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, - "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; + "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabdl", "s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabdl", "u", int_arm_neon_vabdlu, 0>; +defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, + "vabdl", "s", int_arm_neon_vabds, zext, 1>; +defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, + "vabdl", "u", int_arm_neon_vabdu, zext, 1>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "u", int_arm_neon_vabau>; +defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "s", int_arm_neon_vabds, add>; +defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "u", int_arm_neon_vabdu, add>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD, - "vabal", "s", int_arm_neon_vabals>; -defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD, - "vabal", "u", int_arm_neon_vabalu>; +defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, + "vabal", "s", int_arm_neon_vabds, zext, add>; +defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, + "vabal", "u", int_arm_neon_vabdu, zext, add>; // Vector Maximum and Minimum. @@ -3113,8 +3532,8 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; // VMOVN : Vector Narrowing Move -defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, - "vmovn", "i", int_arm_neon_vmovn>; +defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, + "vmovn", "i", trunc>; // VQMOVN : Vector Saturating Narrowing Move defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn", "s", int_arm_neon_vqmovns>; @@ -3123,10 +3542,8 @@ defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun", "s", int_arm_neon_vqmovnsu>; // VMOVL : Vector Lengthening Move -defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s", - int_arm_neon_vmovls>; -defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u", - int_arm_neon_vmovlu>; +defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; +defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; // Vector Conversions. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index bc0790dccbb5f..a13ff12327491 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -221,9 +221,13 @@ def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 // ADD rd, sp, #imm8 +// This is rematerializable, which is particularly useful for taking the +// address of locals. +let isReMaterializable = 1 in { def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi, "add\t$dst, $sp, $rhs", []>, T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8 +} // ADD sp, sp, #imm7 def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, @@ -251,19 +255,6 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, let Inst{2-0} = 0b101; } -// Pseudo instruction that will expand into a tSUBspi + a copy. -let usesCustomInserter = 1 in { // Expanded after instruction selection. -def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), - NoItinerary, "${:comment} sub\t$dst, $rhs", []>; - -def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - NoItinerary, "${:comment} add\t$dst, $rhs", []>; - -let Defs = [CPSR] in -def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - NoItinerary, "${:comment} and\t$dst, $rhs", []>; -} // usesCustomInserter - //===----------------------------------------------------------------------===// // Control Flow Instructions. // @@ -378,7 +369,7 @@ let isBranch = 1, isTerminator = 1 in { def tBR_JTr : T1JTI<(outs), (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt", + IIC_Br, "mov\tpc, $target\n\t.align\t2$jt", [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>, Encoding16 { let Inst{15-7} = 0b010001101; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index bbe675e81ab1d..6ba0a44be4700 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -32,7 +32,7 @@ def t2_so_reg : Operand<i32>, // reg imm ComplexPattern<i32, 2, "SelectT2ShifterOperandReg", [shl,srl,sra,rotr]> { let PrintMethod = "printT2SOOperand"; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops rGPR, i32imm); } // t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value @@ -51,10 +51,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // represented in the imm field in the same 12-bit form that they are encoded // into t2_so_imm instructions: the 8-bit immediate is the least significant // bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11]. -def t2_so_imm : Operand<i32>, - PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; -}]>; +def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]>; // t2_so_imm_not - Match an immediate that is a complement // of a t2_so_imm. @@ -162,7 +159,7 @@ def t2am_imm8s4_offset : Operand<i32> { def t2addrmode_so_reg : Operand<i32>, ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> { let PrintMethod = "printT2AddrModeSoRegOperand"; - let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); + let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm); } @@ -176,9 +173,9 @@ def t2addrmode_so_reg : Operand<i32>, multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, bit Cheap = 0, bit ReMat = 0> { // shifted imm - def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, + def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, opc, "\t$dst, $src", - [(set GPR:$dst, (opnode t2_so_imm:$src))]> { + [(set rGPR:$dst, (opnode t2_so_imm:$src))]> { let isAsCheapAsAMove = Cheap; let isReMaterializable = ReMat; let Inst{31-27} = 0b11110; @@ -189,9 +186,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, + def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set rGPR:$dst, (opnode rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -202,9 +199,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, + def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode t2_so_reg:$src))]> { + [(set rGPR:$dst, (opnode t2_so_reg:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -217,11 +214,11 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, /// binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0, string wide =""> { + bit Commutable = 0, string wide = ""> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -229,9 +226,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -242,9 +239,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -259,23 +256,35 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode, T2I_bin_irs<opcod, opc, opnode, Commutable, ".w">; /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are -/// reversed. It doesn't define the 'rr' form since it's handled by its -/// T2I_bin_irs counterpart. -multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> { +/// reversed. The 'rr' form is only defined for the disassembler; for codegen +/// it is equivalent to the T2I_bin_irs counterpart. +multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, opc, ".w\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; let Inst{20} = ?; // The S bit. let Inst{15} = 0; } + // register + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, rGPR:$lhs), IIC_iALUr, + opc, "\t$dst, $rhs, $lhs", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{20} = ?; // The S bit. + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type + } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, opc, "\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -289,9 +298,9 @@ let Defs = [CPSR] in { multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -299,9 +308,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -312,9 +321,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -328,9 +337,12 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + // The register-immediate version is re-materializable. This is useful + // in particular for taking the address of a local. + let isReMaterializable = 1 in { + def ri : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24} = 1; @@ -338,10 +350,11 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{20} = 0; // The S bit. let Inst{15} = 0; } + } // 12-bit imm - def ri12 : T2I<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi, + def ri12 : T2I<(outs rGPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi, !strconcat(opc, "w"), "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24} = 0; @@ -350,9 +363,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -364,9 +377,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24} = 1; @@ -382,9 +395,9 @@ let Uses = [CPSR] in { multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -393,9 +406,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>, Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; @@ -407,9 +420,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -423,9 +436,9 @@ let Defs = [CPSR] in { multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -434,9 +447,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>, Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; @@ -448,9 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -461,13 +474,14 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, } } -/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit. +/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register +/// version is not needed since this is only for codegen. let Defs = [CPSR] in { multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + def ri : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -475,9 +489,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { let Inst{15} = 0; } // shifted register - def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + def rs : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, !strconcat(opc, "s"), "\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -490,18 +504,18 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // rotate operation that produces a value. multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { // 5-bit imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi, + def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, imm1_31:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-21} = 0b010010; let Inst{19-16} = 0b1111; // Rn let Inst{5-4} = opcod; } // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr, + def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iMOVsr, opc, ".w\t$dst, $lhs, $rhs", - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { + [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-21} = opcod; @@ -513,7 +527,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { /// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test /// patterns. Similar to T2I_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. -let Defs = [CPSR] in { +let isCompare = 1, Defs = [CPSR] in { multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi, @@ -527,9 +541,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> { let Inst{11-8} = 0b1111; // Rd } // register - def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, + def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr, opc, ".w\t$lhs, $rhs", - [(opnode GPR:$lhs, GPR:$rhs)]> { + [(opnode GPR:$lhs, rGPR:$rhs)]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -639,9 +653,9 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { /// T2I_unary_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set rGPR:$dst, (opnode rGPR:$src))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -650,9 +664,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, opc, ".w\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -665,9 +679,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { // UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, opc, "\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]>, + [(set rGPR:$dst, (opnode rGPR:$src))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -677,9 +691,9 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, + [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -694,7 +708,7 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { // SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern // supported yet. multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, opc, "\t$dst, $src", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -704,7 +718,7 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -719,9 +733,9 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { /// T2I_bin_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, + def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr, opc, "\t$dst, $LHS, $RHS", - [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>, + [(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -730,10 +744,10 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot), IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", - [(set GPR:$dst, (opnode GPR:$LHS, - (rotr GPR:$RHS, rot_imm:$rot)))]>, + [(set rGPR:$dst, (opnode rGPR:$LHS, + (rotr rGPR:$RHS, rot_imm:$rot)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -747,7 +761,7 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { // DO variant - disassembly only, no pattern multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { - def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, + def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr, opc, "\t$dst, $LHS, $RHS", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -756,7 +770,7 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot), IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; @@ -779,8 +793,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { // assembler. let neverHasSideEffects = 1 in { let isReMaterializable = 1 in -def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, - "adr$p.w\t$dst, #$label", []> { +def t2LEApcrel : T2XI<(outs rGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, + "adr${p}.w\t$dst, #$label", []> { let Inst{31-27} = 0b11110; let Inst{25-24} = 0b10; // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) @@ -790,9 +804,9 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, let Inst{15} = 0; } } // neverHasSideEffects -def t2LEApcrelJT : T2XI<(outs GPR:$dst), +def t2LEApcrelJT : T2XI<(outs rGPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, - "adr$p.w\t$dst, #${label}_${id}", []> { + "adr${p}.w\t$dst, #${label}_${id}", []> { let Inst{31-27} = 0b11110; let Inst{25-24} = 0b10; // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) @@ -866,9 +880,9 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), } // Signed and unsigned division on v7-M -def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, +def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, "sdiv", "\t$dst, $a, $b", - [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>, + [(set rGPR:$dst, (sdiv rGPR:$a, rGPR:$b))]>, Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; @@ -877,9 +891,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, let Inst{7-4} = 0b1111; } -def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, +def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, "udiv", "\t$dst, $a, $b", - [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>, + [(set rGPR:$dst, (udiv rGPR:$a, rGPR:$b))]>, Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; @@ -888,17 +902,6 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, let Inst{7-4} = 0b1111; } -// Pseudo instruction that will expand into a t2SUBrSPi + a copy. -let usesCustomInserter = 1 in { // Expanded after instruction selection. -def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>; -def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>; -def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>; -} // usesCustomInserter - - //===----------------------------------------------------------------------===// // Load / store Instructions. // @@ -917,10 +920,10 @@ defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword -def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), +def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2), (ins t2addrmode_imm8s4:$addr), IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>; -def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), +def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2), (ins i32imm:$addr), IIC_iLoadi, "ldrd", "\t$dst1, $addr", []> { let Inst{19-16} = 0b1111; // Rn @@ -967,6 +970,11 @@ def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr), def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), (t2LDRHpci tconstpool:$addr)>; +// FIXME: The destination register of the loads and stores can't be PC, but +// can be SP. We need another regclass (similar to rGPR) to represent +// that. Not a pressing issue since these are selected manually, +// not via pattern. + // Indexed loads let mayLoad = 1, neverHasSideEffects = 1 in { def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb), @@ -1286,9 +1294,9 @@ def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in -def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, +def t2MOVi : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, "mov", ".w\t$dst, $src", - [(set GPR:$dst, t2_so_imm:$src)]> { + [(set rGPR:$dst, t2_so_imm:$src)]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b0010; @@ -1298,9 +1306,9 @@ def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, } let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, +def t2MOVi16 : T2I<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", "\t$dst, $src", - [(set GPR:$dst, imm0_65535:$src)]> { + [(set rGPR:$dst, imm0_65535:$src)]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0010; @@ -1309,10 +1317,10 @@ def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, } let Constraints = "$src = $dst" in -def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, +def t2MOVTi16 : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$imm), IIC_iMOVi, "movt", "\t$dst, $imm", - [(set GPR:$dst, - (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]> { + [(set rGPR:$dst, + (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0110; @@ -1320,7 +1328,7 @@ def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, let Inst{15} = 0; } -def : T2Pat<(or GPR:$src, 0xffff0000), (t2MOVTi16 GPR:$src, 0xffff)>; +def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; //===----------------------------------------------------------------------===// // Extend Instructions. @@ -1352,10 +1360,14 @@ defm t2UXTH : T2I_unary_rrot<0b001, "uxth", defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; -def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>; -def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>; +// FIXME: This pattern incorrectly assumes the shl operator is a rotate. +// The transformation should probably be done as a combiner action +// instead so we can include a check for masking back in the upper +// eight bits of the source into the lower eight bits of the result. +//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF), +// (t2UXTB16r_rot rGPR:$Src, 24)>, Requires<[HasT2ExtractPack]>; +def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF), + (t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>; defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; @@ -1389,7 +1401,7 @@ defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc", BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; // RSB -defm t2RSB : T2I_rbin_is <0b1110, "rsb", +defm t2RSB : T2I_rbin_irs <0b1110, "rsb", BinOpFrag<(sub node:$LHS, node:$RHS)>>; defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>; @@ -1409,18 +1421,18 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; let AddedComplexity = 1 in -def : T2Pat<(addc GPR:$src, imm0_255_neg:$imm), - (t2SUBSri GPR:$src, imm0_255_neg:$imm)>; -def : T2Pat<(addc GPR:$src, t2_so_imm_neg:$imm), - (t2SUBSri GPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm), + (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>; +def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm), + (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>; // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. let AddedComplexity = 1 in -def : T2Pat<(adde GPR:$src, imm0_255_not:$imm), - (t2SBCSri GPR:$src, imm0_255_not:$imm)>; -def : T2Pat<(adde GPR:$src, t2_so_imm_not:$imm), - (t2SBCSri GPR:$src, t2_so_imm_not:$imm)>; +def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm), + (t2SBCSri rGPR:$src, imm0_255_not:$imm)>; +def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm), + (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>; // Select Bytes -- for disassembly only @@ -1437,9 +1449,10 @@ def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel", // A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned) // And Miscellaneous operations -- for disassembly only -class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc> - : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc, - "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> { +class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc, + list<dag> pat = [/* For disassembly only; pattern left blank */]> + : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), NoItinerary, opc, + "\t$dst, $a, $b", pat> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0101; let Inst{22-20} = op22_20; @@ -1449,14 +1462,16 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc> // Saturating add/subtract -- for disassembly only -def t2QADD : T2I_pam<0b000, 0b1000, "qadd">; +def t2QADD : T2I_pam<0b000, 0b1000, "qadd", + [(set rGPR:$dst, (int_arm_qadd rGPR:$a, rGPR:$b))]>; def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">; def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">; def t2QASX : T2I_pam<0b010, 0b0001, "qasx">; def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">; def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">; def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">; -def t2QSUB : T2I_pam<0b000, 0b1010, "qsub">; +def t2QSUB : T2I_pam<0b000, 0b1010, "qsub", + [(set rGPR:$dst, (int_arm_qsub rGPR:$a, rGPR:$b))]>; def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">; def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">; def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">; @@ -1498,37 +1513,27 @@ def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">; // Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only -def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), +def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b), NoItinerary, "usad8", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8", +def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), NoItinerary, "usada8", "\t$dst, $a, $b, $acc", []>; // Signed/Unsigned saturate -- for disassembly only -def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1100; - let Inst{20} = 0; - let Inst{15} = 0; - let Inst{21} = 0; // sh = '0' -} - -def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh), + NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1100; let Inst{20} = 0; let Inst{15} = 0; - let Inst{21} = 1; // sh = '1' } -def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, +def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary, "ssat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; @@ -1540,27 +1545,16 @@ def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, let Inst{7-6} = 0b00; // imm2 = '00' } -def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11110; - let Inst{25-22} = 0b1110; - let Inst{20} = 0; - let Inst{15} = 0; - let Inst{21} = 0; // sh = '0' -} - -def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), - NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt", - [/* For disassembly only; pattern left blank */]> { +def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh), + NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh", + [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1110; let Inst{20} = 0; let Inst{15} = 0; - let Inst{21} = 1; // sh = '1' } -def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, +def t2USAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary, "usat16", "\t$dst, $bit_pos, $a", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; @@ -1572,6 +1566,9 @@ def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, let Inst{7-6} = 0b00; // imm2 = '00' } +def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>; + //===----------------------------------------------------------------------===// // Shift and rotate Instructions. // @@ -1582,9 +1579,9 @@ defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; let Uses = [CPSR] in { -def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, +def t2MOVrx : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, "rrx", "\t$dst, $src", - [(set GPR:$dst, (ARMrrx GPR:$src))]> { + [(set rGPR:$dst, (ARMrrx rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1596,9 +1593,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, } let Defs = [CPSR] in { -def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, +def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, "lsrs", ".w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> { + [(set rGPR:$dst, (ARMsrl_flag rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1609,9 +1606,9 @@ def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, let Inst{14-12} = 0b000; let Inst{7-6} = 0b01; } -def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, +def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, "asrs", ".w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsra_flag GPR:$src))]> { + [(set rGPR:$dst, (ARMsra_flag rGPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1638,10 +1635,13 @@ defm t2EOR : T2I_bin_w_irs<0b0100, "eor", defm t2BIC : T2I_bin_w_irs<0b0001, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; +defm t2ANDS : T2I_bin_s_irs<0b0000, "and", + BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>; + let Constraints = "$src = $dst" in -def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), +def t2BFC : T2I<(outs rGPR:$dst), (ins rGPR:$src, bf_inv_mask_imm:$imm), IIC_iUNAsi, "bfc", "\t$dst, $imm", - [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]> { + [(set rGPR:$dst, (and rGPR:$src, bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10110; @@ -1649,7 +1649,7 @@ def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), let Inst{15} = 0; } -def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), +def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -1657,7 +1657,7 @@ def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), let Inst{15} = 0; } -def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), +def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -1666,10 +1666,12 @@ def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), } // A8.6.18 BFI - Bitfield insert (Encoding T1) -// Added for disassembler with the pattern field purposely left blank. -// FIXME: Utilize this instruction in codgen. -def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iALUi, "bfi", "\t$dst, $src, $lsb, $width", []> { +let Constraints = "$src = $dst" in +def t2BFI : T2I<(outs rGPR:$dst), + (ins rGPR:$src, rGPR:$val, bf_inv_mask_imm:$imm), + IIC_iALUi, "bfi", "\t$dst, $val, $imm", + [(set rGPR:$dst, (ARMbfi rGPR:$src, rGPR:$val, + bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10110; @@ -1677,19 +1679,20 @@ def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), } defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS, - (not node:$RHS))>>; + (not node:$RHS))>, 0, "">; // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version let AddedComplexity = 1 in defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>; -def : T2Pat<(and GPR:$src, t2_so_imm_not:$imm), - (t2BICri GPR:$src, t2_so_imm_not:$imm)>; +let AddedComplexity = 1 in +def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), + (t2BICri rGPR:$src, t2_so_imm_not:$imm)>; // FIXME: Disable this pattern on Darwin to workaround an assembler bug. -def : T2Pat<(or GPR:$src, t2_so_imm_not:$imm), - (t2ORNri GPR:$src, t2_so_imm_not:$imm)>, +def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm), + (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>, Requires<[IsThumb2]>; def : T2Pat<(t2_so_imm_not:$src), @@ -1699,9 +1702,9 @@ def : T2Pat<(t2_so_imm_not:$src), // Multiply Instructions. // let isCommutable = 1 in -def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, +def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, "mul", "\t$dst, $a, $b", - [(set GPR:$dst, (mul GPR:$a, GPR:$b))]> { + [(set rGPR:$dst, (mul rGPR:$a, rGPR:$b))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -1709,9 +1712,9 @@ def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0000; // Multiply } -def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2MLA: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "mla", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]> { + [(set rGPR:$dst, (add (mul rGPR:$a, rGPR:$b), rGPR:$c))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -1719,9 +1722,9 @@ def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // Multiply } -def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2MLS: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "mls", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]> { + [(set rGPR:$dst, (sub rGPR:$c, (mul rGPR:$a, rGPR:$b)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -1732,7 +1735,8 @@ def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, // Extra precision multiplies with low / high results let neverHasSideEffects = 1 in { let isCommutable = 1 in { -def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, +def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMUL64, "smull", "\t$ldst, $hdst, $a, $b", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1740,7 +1744,8 @@ def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, let Inst{7-4} = 0b0000; } -def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, +def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMUL64, "umull", "\t$ldst, $hdst, $a, $b", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1750,7 +1755,8 @@ def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, } // isCommutable // Multiply + accumulate -def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, +def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMAC64, "smlal", "\t$ldst, $hdst, $a, $b", []>{ let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1758,7 +1764,8 @@ def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, let Inst{7-4} = 0b0000; } -def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, +def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMAC64, "umlal", "\t$ldst, $hdst, $a, $b", []>{ let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1766,7 +1773,8 @@ def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, let Inst{7-4} = 0b0000; } -def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, +def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), + (ins rGPR:$a, rGPR:$b), IIC_iMAC64, "umaal", "\t$ldst, $hdst, $a, $b", []>{ let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0111; @@ -1778,9 +1786,9 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, // Rounding variants of the below included for disassembly only // Most significant word multiply -def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, +def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, "smmul", "\t$dst, $a, $b", - [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]> { + [(set rGPR:$dst, (mulhs rGPR:$a, rGPR:$b))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -1788,7 +1796,7 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, +def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, "smmulr", "\t$dst, $a, $b", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -1797,9 +1805,9 @@ def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) } -def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLA : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> { + [(set rGPR:$dst, (add (mulhs rGPR:$a, rGPR:$b), rGPR:$c))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -1807,7 +1815,7 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLAR: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -1816,9 +1824,9 @@ def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) } -def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLS: T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]> { + [(set rGPR:$dst, (sub rGPR:$c, (mulhs rGPR:$a, rGPR:$b)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; @@ -1826,7 +1834,7 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, +def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -1836,10 +1844,10 @@ def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, } multiclass T2I_smul<string opc, PatFrag opnode> { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), - (sext_inreg GPR:$b, i16)))]> { + [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16), + (sext_inreg rGPR:$b, i16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1848,10 +1856,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "bt"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16))))]> { + [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16), + (sra rGPR:$b, (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1860,10 +1868,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b01; } - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "tb"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), - (sext_inreg GPR:$b, i16)))]> { + [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)), + (sext_inreg rGPR:$b, i16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1872,10 +1880,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b10; } - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, !strconcat(opc, "tt"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16))))]> { + [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)), + (sra rGPR:$b, (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1884,10 +1892,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b11; } - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, + def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, !strconcat(opc, "wb"), "\t$dst, $a, $b", - [(set GPR:$dst, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16)))]> { + [(set rGPR:$dst, (sra (opnode rGPR:$a, + (sext_inreg rGPR:$b, i16)), (i32 16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1896,10 +1904,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, + def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, !strconcat(opc, "wt"), "\t$dst, $a, $b", - [(set GPR:$dst, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16)))]> { + [(set rGPR:$dst, (sra (opnode rGPR:$a, + (sra rGPR:$b, (i32 16))), (i32 16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1911,11 +1919,11 @@ multiclass T2I_smul<string opc, PatFrag opnode> { multiclass T2I_smla<string opc, PatFrag opnode> { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, - (opnode (sext_inreg GPR:$a, i16), - (sext_inreg GPR:$b, i16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, + (opnode (sext_inreg rGPR:$a, i16), + (sext_inreg rGPR:$b, i16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1924,10 +1932,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16)))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (opnode (sext_inreg rGPR:$a, i16), + (sra rGPR:$b, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1936,10 +1944,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b01; } - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sext_inreg GPR:$b, i16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)), + (sext_inreg rGPR:$b, i16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1948,10 +1956,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b10; } - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16)))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)), + (sra rGPR:$b, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1960,10 +1968,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b11; } - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a, + (sext_inreg rGPR:$b, i16)), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1972,10 +1980,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16))))]> { + [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a, + (sra rGPR:$b, (i32 16))), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1989,61 +1997,61 @@ defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; // Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only -def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", +def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; -def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", +def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; -def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", +def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; -def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", +def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", [/* For disassembly only; pattern left blank */]>; // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD // These are for disassembly only. -def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> { +def t2SMUAD: T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> { +def t2SMUADX:T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> { +def t2SMUSD: T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> { +def t2SMUSDX:T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), + IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> { let Inst{15-12} = 0b1111; } -def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad", +def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlad", "\t$dst, $a, $b, $acc", []>; -def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx", +def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smladx", "\t$dst, $a, $b, $acc", []>; -def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd", +def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsd", "\t$dst, $a, $b, $acc", []>; -def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), - (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx", +def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), + (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsdx", "\t$dst, $a, $b, $acc", []>; -def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald", +def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlald", "\t$ldst, $hdst, $a, $b", []>; -def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx", +def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaldx", "\t$ldst, $hdst, $a, $b", []>; -def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld", +def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsld", "\t$ldst, $hdst, $a, $b", []>; -def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst), - (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx", +def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst), + (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsldx", "\t$ldst, $hdst, $a, $b", []>; //===----------------------------------------------------------------------===// @@ -2061,35 +2069,35 @@ class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, let Inst{5-4} = op2; } -def t2CLZ : T2I_misc<0b11, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "clz", "\t$dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>; +def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, + "clz", "\t$dst, $src", [(set rGPR:$dst, (ctlz rGPR:$src))]>; -def t2RBIT : T2I_misc<0b01, 0b10, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, +def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, "rbit", "\t$dst, $src", - [(set GPR:$dst, (ARMrbit GPR:$src))]>; + [(set rGPR:$dst, (ARMrbit rGPR:$src))]>; -def t2REV : T2I_misc<0b01, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rev", ".w\t$dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>; +def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, + "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>; -def t2REV16 : T2I_misc<0b01, 0b01, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, +def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, "rev16", ".w\t$dst, $src", - [(set GPR:$dst, - (or (and (srl GPR:$src, (i32 8)), 0xFF), - (or (and (shl GPR:$src, (i32 8)), 0xFF00), - (or (and (srl GPR:$src, (i32 8)), 0xFF0000), - (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>; + [(set rGPR:$dst, + (or (and (srl rGPR:$src, (i32 8)), 0xFF), + (or (and (shl rGPR:$src, (i32 8)), 0xFF00), + (or (and (srl rGPR:$src, (i32 8)), 0xFF0000), + (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>; -def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, +def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, "revsh", ".w\t$dst, $src", - [(set GPR:$dst, + [(set rGPR:$dst, (sext_inreg - (or (srl (and GPR:$src, 0xFF00), (i32 8)), - (shl GPR:$src, (i32 8))), i16))]>; + (or (srl (and rGPR:$src, 0xFF00), (i32 8)), + (shl rGPR:$src, (i32 8))), i16))]>; -def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", - [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), - (and (shl GPR:$src2, (i32 imm:$shamt)), +def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh", + [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF), + (and (shl rGPR:$src2, lsl_amt:$sh), 0xFFFF0000)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; @@ -2100,18 +2108,20 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), } // Alternate cases for PKHBT where identities eliminate some nodes. -def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), - (t2PKHBT GPR:$src1, GPR:$src2, 0)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)), + (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>, Requires<[HasT2ExtractPack]>; -def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), - (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)), + (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>, Requires<[HasT2ExtractPack]>; -def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", - [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), - (and (sra GPR:$src2, imm16_31:$shamt), - 0xFFFF)))]>, +// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and +// will match the pattern below. +def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh), + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh", + [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF0000), + (and (sra rGPR:$src2, asr_amt:$sh), + 0xFFFF)))]>, Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -2122,18 +2132,17 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))), - (t2PKHTB GPR:$src1, GPR:$src2, 16)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)), + (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>, Requires<[HasT2ExtractPack]>; -def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), - (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), - (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>, +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), + (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)), + (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>, Requires<[HasT2ExtractPack]>; //===----------------------------------------------------------------------===// // Comparison Instructions... // - defm t2CMP : T2I_cmp_irs<0b1101, "cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; defm t2CMPz : T2I_cmp_irs<0b1101, "cmp", @@ -2157,18 +2166,13 @@ defm t2TST : T2I_cmp_irs<0b0000, "tst", defm t2TEQ : T2I_cmp_irs<0b0100, "teq", BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>; -// A8.6.27 CBNZ, CBZ - Compare and branch on (non)zero. -// Short range conditional branch. Looks awesome for loops. Need to figure -// out how to use this one. - - // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { -def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, +def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr, "mov", ".w\t$dst, $true", - [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, + [/*(set rGPR:$dst, (ARMcmov rGPR:$false, rGPR:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst"> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -2179,9 +2183,9 @@ def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, let Inst{7-4} = 0b0000; } -def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true), +def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true), IIC_iCMOVi, "mov", ".w\t$dst, $true", -[/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>, +[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst"> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -2201,20 +2205,20 @@ class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{19-16} = 0b1111; // Rn let Inst{5-4} = opcod; // Shift type. } -def t2MOVCClsl : T2I_movcc_sh<0b00, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; -def t2MOVCClsr : T2I_movcc_sh<0b01, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; -def t2MOVCCasr : T2I_movcc_sh<0b10, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; -def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst), - (ins GPR:$false, GPR:$true, i32imm:$rhs), +def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst), + (ins rGPR:$false, rGPR:$true, i32imm:$rhs), IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; } // neverHasSideEffects @@ -2225,21 +2229,15 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def t2Int_MemBarrierV7 : AInoP<(outs), (ins), - ThumbFrm, NoItinerary, - "dmb", "", - [(ARMMemBarrierV7)]>, - Requires<[IsThumb2]> { +def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "", + [(ARMMemBarrier)]>, Requires<[IsThumb, HasDB]> { let Inst{31-4} = 0xF3BF8F5; // FIXME: add support for options other than a full system DMB let Inst{3-0} = 0b1111; } -def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), - ThumbFrm, NoItinerary, - "dsb", "", - [(ARMSyncBarrierV7)]>, - Requires<[IsThumb2]> { +def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "", + [(ARMSyncBarrier)]>, Requires<[IsThumb, HasDB]> { let Inst{31-4} = 0xF3BF8F4; // FIXME: add support for options other than a full system DSB let Inst{3-0} = 0b1111; @@ -2329,13 +2327,13 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, +def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrexb", "\t$dest, [$ptr]", "", []>; -def t2LDREXH : T2I_ldrex<0b01, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, +def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrexh", "\t$dest, [$ptr]", "", []>; -def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, +def t2LDREX : Thumb2I<(outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrex", "\t$dest, [$ptr]", "", []> { @@ -2344,20 +2342,20 @@ def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, let Inst{11-8} = 0b1111; let Inst{7-0} = 0b00000000; // imm8 = 0 } -def t2LDREXD : T2I_ldrex<0b11, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), +def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$dest, rGPR:$dest2), (ins rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "ldrexd", "\t$dest, $dest2, [$ptr]", "", [], {?, ?, ?, ?}>; } let mayStore = 1, Constraints = "@earlyclobber $success" in { -def t2STREXB : T2I_strex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), +def t2STREXB : T2I_strex<0b00, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strexb", "\t$success, $src, [$ptr]", "", []>; -def t2STREXH : T2I_strex<0b01, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), +def t2STREXH : T2I_strex<0b01, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strexh", "\t$success, $src, [$ptr]", "", []>; -def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), +def t2STREX : Thumb2I<(outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strex", "\t$success, $src, [$ptr]", "", []> { @@ -2365,8 +2363,8 @@ def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), let Inst{26-20} = 0b0000100; let Inst{7-0} = 0b00000000; // imm8 = 0 } -def t2STREXD : T2I_strex<0b11, (outs GPR:$success), - (ins GPR:$src, GPR:$src2, GPR:$ptr), +def t2STREXD : T2I_strex<0b11, (outs rGPR:$success), + (ins rGPR:$src, rGPR:$src2, rGPR:$ptr), AddrModeNone, Size4Bytes, NoItinerary, "strexd", "\t$success, $src, $src2, [$ptr]", "", [], {?, ?, ?, ?}>; @@ -2416,7 +2414,7 @@ let Defs = D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31 ], hasSideEffects = 1, isBarrier = 1 in { - def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), + def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" "adds\t$val, #7\n\t" @@ -2425,14 +2423,14 @@ let Defs = "b\t1f\n\t" "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; } let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], hasSideEffects = 1, isBarrier = 1 in { - def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), + def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" "adds\t$val, #7\n\t" @@ -2441,7 +2439,7 @@ let Defs = "b\t1f\n\t" "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; } @@ -2482,7 +2480,7 @@ let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : T2JTI<(outs), (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target\n$jt", + IIC_Br, "mov\tpc, $target$jt", [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0100100; @@ -2496,7 +2494,7 @@ def t2BR_JT : def t2TBB : T2JTI<(outs), (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbb\t$index\n$jt", []> { + IIC_Br, "tbb\t$index$jt", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0001101; let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction) @@ -2507,7 +2505,7 @@ def t2TBB : def t2TBH : T2JTI<(outs), (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbh\t$index\n$jt", []> { + IIC_Br, "tbh\t$index$jt", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0001101; let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction) @@ -2560,7 +2558,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} -def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", +def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -2647,25 +2645,25 @@ def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode", } // Return From Exception is a system instruction -- for disassembly only -def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!", +def t2RFEDBW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfedb", "\t$base!", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000011; // W = 1 } -def t2RFEDB : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base", +def t2RFEDB : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeab", "\t$base", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000001; // W = 0 } -def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!", +def t2RFEIAW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base!", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0011011; // W = 1 } -def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base", +def t2RFEIA : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0011001; // W = 0 @@ -2676,26 +2674,26 @@ def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base", // // Two piece so_imms. -def : T2Pat<(or GPR:$LHS, t2_so_imm2part:$RHS), - (t2ORRri (t2ORRri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), +def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS), + (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS), - (t2EORri (t2EORri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), +def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS), + (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS), - (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), +def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS), + (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS), - (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)), +def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS), + (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)), (t2_so_neg_imm2part_2 imm:$RHS))>; // 32-bit immediate using movw + movt. // This is a single pseudo instruction to make it re-materializable. Remove // when we can do generalized remat. let isReMaterializable = 1 in -def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, +def t2MOVi32imm : T2Ix2<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", - [(set GPR:$dst, (i32 imm:$src))]>; + [(set rGPR:$dst, (i32 imm:$src))]>; // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, @@ -2723,7 +2721,7 @@ def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), // // Rd = Instr{11-8} -def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", +def t2MRS : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -2734,7 +2732,7 @@ def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", } // Rd = Instr{11-8} -def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", +def t2MRSsys : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -2745,7 +2743,7 @@ def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", } // Rn = Inst{19-16} -def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr", +def t2MSR : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr", "\tcpsr$mask, $src", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; @@ -2757,7 +2755,7 @@ def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr", } // Rn = Inst{19-16} -def t2MSRsys : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr", +def t2MSRsys : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr", "\tspsr$mask, $src", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 84c23e1a784cc..c29e09606bd48 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -77,61 +77,61 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), // let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { -def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, +def VLDMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> { + "vldm${addr:submode}${p}\t$addr, $dsts", "", []> { let Inst{20} = 1; } -def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, +def VLDMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> { + "vldm${addr:submode}${p}\t$addr, $dsts", "", []> { let Inst{20} = 1; } -def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VLDMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}!, $dsts", - "$addr.base = $wb", []> { + "vldm${addr:submode}${p}\t$addr!, $dsts", + "$addr.addr = $wb", []> { let Inst{20} = 1; } -def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VLDMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}!, $dsts", - "$addr.base = $wb", []> { + "vldm${addr:submode}${p}\t$addr!, $dsts", + "$addr.addr = $wb", []> { let Inst{20} = 1; } } // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { -def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, +def VSTMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> { + "vstm${addr:submode}${p}\t$addr, $srcs", "", []> { let Inst{20} = 0; } -def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, +def VSTMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> { + "vstm${addr:submode}${p}\t$addr, $srcs", "", []> { let Inst{20} = 0; } -def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VSTMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}!, $srcs", - "$addr.base = $wb", []> { + "vstm${addr:submode}${p}\t$addr!, $srcs", + "$addr.addr = $wb", []> { let Inst{20} = 0; } -def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, +def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}!, $srcs", - "$addr.base = $wb", []> { + "vstm${addr:submode}${p}\t$addr!, $srcs", + "$addr.addr = $wb", []> { let Inst{20} = 0; } } // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq @@ -420,34 +420,35 @@ def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. // For disassembly only. - +let Uses = [FPSCR] in { def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> { let Inst{7} = 0; // Z bit } def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> { let Inst{7} = 0; // Z bit } def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> { let Inst{7} = 0; // Z bit } def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$dst), (ins SPR:$a), IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a", - [/* For disassembly only; pattern left blank */]> { + [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> { let Inst{7} = 0; // Z bit } +} // Convert between floating-point and fixed-point // Data type for fixed-point naming convention: @@ -460,6 +461,7 @@ let Constraints = "$a = $dst" in { // FP to Fixed-Point: +let isCodeGenOnly = 1 in { def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", @@ -499,9 +501,11 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]>; +} // Fixed-Point to FP: +let isCodeGenOnly = 1 in { def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", @@ -541,6 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]>; +} } // End of 'let Constraints = "$src = $dst" in' @@ -654,32 +659,27 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", } // FPSCR <-> GPR (for disassembly only) - -let neverHasSideEffects = 1 in { -let Uses = [FPSCR] in { -def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", - "\t$dst, fpscr", - [/* For disassembly only; pattern left blank */]> { +let hasSideEffects = 1, Uses = [FPSCR] in +def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, + "vmrs", "\t$dst, fpscr", + [(set GPR:$dst, (int_arm_get_fpscr))]> { let Inst{27-20} = 0b11101111; let Inst{19-16} = 0b0001; let Inst{11-8} = 0b1010; let Inst{7} = 0; let Inst{4} = 1; } -} -let Defs = [FPSCR] in { -def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", - "\tfpscr, $src", - [/* For disassembly only; pattern left blank */]> { +let Defs = [FPSCR] in +def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, + "vmsr", "\tfpscr, $src", + [(int_arm_set_fpscr GPR:$src)]> { let Inst{27-20} = 0b11101110; let Inst{19-16} = 0b0001; let Inst{11-8} = 0b1010; let Inst{7} = 0; let Inst{4} = 1; } -} -} // neverHasSideEffects // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index f80e316d23e85..2b7645a42119e 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -57,7 +57,7 @@ STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); namespace { struct ARMLoadStoreOpt : public MachineFunctionPass { static char ID; - ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} + ARMLoadStoreOpt() : MachineFunctionPass(ID) {} const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -193,20 +193,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); - if (isAM4 && Offset == 4) { - if (isThumb2) - // Thumb2 does not support ldmib / stmib. - return false; + // VFP and Thumb2 do not support IB or DA modes. + bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); + bool haveIBAndDA = isNotVFP && !isThumb2; + if (Offset == 4 && haveIBAndDA) Mode = ARM_AM::ib; - } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { - if (isThumb2) - // Thumb2 does not support ldmda / stmda. - return false; + else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) Mode = ARM_AM::da; - } else if (isAM4 && Offset == -4 * (int)NumRegs) { + else if (Offset == -4 * (int)NumRegs && isNotVFP) + // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; - } else if (Offset != 0) { + else if (Offset != 0) { // If starting offset isn't zero, insert a MI to materialize a new base. // But only do so if it is cost effective, i.e. merging more than two // loads / stores. @@ -246,18 +243,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, BaseKill = true; // New base is always killed right its use. } - bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD); bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); Opcode = getLoadStoreMultipleOpcode(Opcode); - MachineInstrBuilder MIB = (isAM4) - ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) - : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs)) - .addImm(Pred).addReg(PredReg); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg); for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -333,6 +324,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, if (KilledRegs.count(Reg)) { unsigned j = Killer[Reg]; memOps[j].MBBI->getOperand(0).setIsKill(false); + memOps[j].isKill = false; } } MBB.erase(memOps[i].MBBI); @@ -348,7 +340,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, SmallVector<MachineBasicBlock::iterator, 4> &Merges) { - bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); + bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); int Offset = MemOps[SIndex].Offset; int SOffset = Offset; unsigned insertAfter = SIndex; @@ -366,12 +358,12 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Reg = MO.getReg(); unsigned RegNum = MO.isUndef() ? UINT_MAX : ARMRegisterInfo::getRegisterNumbering(Reg); - // AM4 - register numbers in ascending order. - // AM5 - consecutive register numbers in ascending order. - // Can only do up to 16 double-word registers per insn. + // Register numbers must be in ascending order. For VFP, the registers + // must also be consecutive and there is a limit of 16 double-word + // registers per instruction. if (Reg != ARM::SP && NewOffset == Offset + (int)Size && - ((isAM4 && RegNum > PRegNum) + ((isNotVFP && RegNum > PRegNum) || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { Offset += Size; PRegNum = RegNum; @@ -409,7 +401,7 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, return false; // Make sure the offset fits in 8 bits. - if (Bytes <= 0 || (Limit && Bytes >= Limit)) + if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME @@ -433,7 +425,7 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, MI->getOpcode() != ARM::ADDri) return false; - if (Bytes <= 0 || (Limit && Bytes >= Limit)) + if (Bytes == 0 || (Limit && Bytes >= Limit)) // Make sure the offset fits in 8 bits. return false; @@ -464,12 +456,12 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STM: case ARM::t2LDM: case ARM::t2STM: - return (MI->getNumOperands() - 4) * 4; case ARM::VLDMS: case ARM::VSTMS: + return (MI->getNumOperands() - 4) * 4; case ARM::VLDMD: case ARM::VSTMD: - return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; + return (MI->getNumOperands() - 4) * 8; } } @@ -512,26 +504,17 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); - bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM || - Opcode == ARM::STM || Opcode == ARM::t2STM); bool DoMerge = false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - unsigned Offset = 0; - if (isAM4) { - // Can't use an updating ld/st if the base register is also a dest - // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. - for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { - if (MI->getOperand(i).getReg() == Base) - return false; - } - Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); - } else { - // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. - Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); - Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); + // Can't use an updating ld/st if the base register is also a dest + // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. + for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).getReg() == Base) + return false; } + Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); // Try merging with the previous instruction. MachineBasicBlock::iterator BeginMBBI = MBB.begin(); @@ -539,22 +522,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator PrevMBBI = prior(MBBI); while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) --PrevMBBI; - if (isAM4) { - if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - Mode = ARM_AM::db; - } else if (isAM4 && Mode == ARM_AM::ib && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - Mode = ARM_AM::da; - } - } else { - if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - Mode = ARM_AM::db; - DoMerge = true; - } + if (Mode == ARM_AM::ia && + isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { + Mode = ARM_AM::db; + DoMerge = true; + } else if (Mode == ARM_AM::ib && + isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { + Mode = ARM_AM::da; + DoMerge = true; } if (DoMerge) MBB.erase(PrevMBBI); @@ -566,19 +541,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) ++NextMBBI; - if (isAM4) { - if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && - isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && - isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } - } else { - if (Mode == ARM_AM::ia && - isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } + if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && + isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { + DoMerge = true; + } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && + isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { + DoMerge = true; } if (DoMerge) { if (NextMBBI == I) { @@ -595,16 +563,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register - .addReg(Base, getKillRegState(BaseKill)); - if (isAM4) { - // [t2]LDM_UPD, [t2]STM_UPD - MIB.addImm(ARM_AM::getAM4ModeImm(Mode)) - .addImm(Pred).addReg(PredReg); - } else { - // VLDM[SD}_UPD, VSTM[SD]_UPD - MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset)) - .addImm(Pred).addReg(PredReg); - } + .addReg(Base, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(Mode)) + .addImm(Pred).addReg(PredReg); // Transfer the rest of operands. for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.addOperand(MI->getOperand(OpNum)); @@ -736,11 +697,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (!DoMerge) return false; - bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; unsigned Offset = 0; if (isAM5) - Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia, - (isDPR ? 2 : 1)); + Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ? + ARM_AM::db : ARM_AM::ia); else if (isAM2) Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); else @@ -748,6 +708,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (isAM5) { // VLDM[SD}_UPD, VSTM[SD]_UPD + // (There are no base-updating versions of VLDR/VSTR instructions, but the + // updating load/store-multiple instructions can be used with only one + // register.) MachineOperand &MO = MI->getOperand(0); BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register @@ -1268,7 +1231,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { namespace { struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ static char ID; - ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} + ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} const TargetData *TD; const TargetInstrInfo *TII; diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index ab2b06b60783b..ab2b06b60783b 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/ARMMCInstLower.h index b81a30690ce24..b81a30690ce24 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h +++ b/lib/Target/ARM/ARMMCInstLower.h diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 7e57a1ca55762..514c26b4daf03 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -43,6 +43,10 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// processFunctionBeforeCalleeSavedScan(). bool HasStackFrame; + /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by + /// emitPrologue. + bool RestoreSPFromFP; + /// LRSpilledForFarJump - True if the LR register has been for spilled to /// enable far jump. bool LRSpilledForFarJump; @@ -95,7 +99,7 @@ public: ARMFunctionInfo() : isThumb(false), hasThumb2(false), - VarArgsRegSaveSize(0), HasStackFrame(false), + VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), @@ -106,7 +110,7 @@ public: explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), - VarArgsRegSaveSize(0), HasStackFrame(false), + VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), @@ -125,6 +129,9 @@ public: bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } + bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; } + void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; } + bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; } void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; } diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index d020f3c74bdea..305b232e6a99a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===// +//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -220,41 +220,11 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // FP is R11, R9 is available. - static const unsigned ARM_GPR_AO_1[] = { + static const unsigned ARM_GPR_AO[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R9, ARM::R10, - ARM::R11 }; - // FP is R11, R9 is not available. - static const unsigned ARM_GPR_AO_2[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, ARM::R7, - ARM::R8, ARM::R10, - ARM::R11 }; - // FP is R7, R9 is available as non-callee-saved register. - // This is used by Darwin. - static const unsigned ARM_GPR_AO_3[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R9, ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, - ARM::R8, ARM::R10,ARM::R11,ARM::R7 }; - // FP is R7, R9 is not available. - static const unsigned ARM_GPR_AO_4[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, - ARM::R8, ARM::R10,ARM::R11, - ARM::R7 }; - // FP is R7, R9 is available as callee-saved register. - // This is used by non-Darwin platform in Thumb mode. - static const unsigned ARM_GPR_AO_5[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, - ARM::R12,ARM::LR, - ARM::R4, ARM::R5, ARM::R6, - ARM::R8, ARM::R9, ARM::R10,ARM::R11,ARM::R7 }; + ARM::R8, ARM::R9, ARM::R10, ARM::R11 }; // For Thumb1 mode, we don't want to allocate hi regs at all, as we // don't know how to spill them. If we make our prologue/epilogue code @@ -270,85 +240,71 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); if (Subtarget.isThumb1Only()) return THUMB_GPR_AO; - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - return ARM_GPR_AO_4; - else - return ARM_GPR_AO_3; - } else { - if (Subtarget.isR9Reserved()) - return ARM_GPR_AO_2; - else if (Subtarget.isThumb()) - return ARM_GPR_AO_5; - else - return ARM_GPR_AO_1; - } + return ARM_GPR_AO; } GPRClass::iterator GPRClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - GPRClass::iterator I; - - if (Subtarget.isThumb1Only()) { - I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned)); - // Mac OS X requires FP not to be clobbered for backtracing purpose. - return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; - } - - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned)); - else - I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned)); - } else { - if (Subtarget.isR9Reserved()) - I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned)); - else if (Subtarget.isThumb()) - I = ARM_GPR_AO_5 + (sizeof(ARM_GPR_AO_5)/sizeof(unsigned)); - else - I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned)); - } - - // Mac OS X requires FP not to be clobbered for backtracing purpose. - return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; + if (Subtarget.isThumb1Only()) + return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned)); + return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned)); } }]; } -// Thumb registers are R0-R7 normally. Some instructions can still use -// the general GPR register class above (MOV, e.g.) -def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { +// restricted GPR register class. Many Thumb2 instructions allow the full +// register range for operands, but have undefined behaviours when PC +// or SP (R13 or R15) are used. The ARM ARM refers to these operands +// via the BadReg() pseudo-code description. +def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, + R7, R8, R9, R10, R11, R12, LR]> { let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - static const unsigned THUMB_tGPR_AO[] = { + static const unsigned ARM_rGPR_AO[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + ARM::R8, ARM::R9, ARM::R10, + ARM::R11 }; + + // For Thumb1 mode, we don't want to allocate hi regs at all, as we + // don't know how to spill them. If we make our prologue/epilogue code + // smarter at some point, we can go back to using the above allocation + // orders for the Thumb1 instructions that know how to use hi regs. + static const unsigned THUMB_rGPR_AO[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; - // FP is R7, only low registers available. - tGPRClass::iterator - tGPRClass::allocation_order_begin(const MachineFunction &MF) const { - return THUMB_tGPR_AO; + rGPRClass::iterator + rGPRClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + if (Subtarget.isThumb1Only()) + return THUMB_rGPR_AO; + return ARM_rGPR_AO; } - tGPRClass::iterator - tGPRClass::allocation_order_end(const MachineFunction &MF) const { + rGPRClass::iterator + rGPRClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - tGPRClass::iterator I = - THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned)); - // Mac OS X requires FP not to be clobbered for backtracing purpose. - return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; + + if (Subtarget.isThumb1Only()) + return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned)); + return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned)); } }]; } +// Thumb registers are R0-R7 normally. Some instructions can still use +// the general GPR register class above (MOV, e.g.) +def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {} + // For tail calls, we can't use callee-saved registers, as they are restored // to the saved value before the tail call, which would clobber a call address. // Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of @@ -381,36 +337,20 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> { const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); if (Subtarget.isThumb1Only()) return THUMB_GPR_AO_TC; - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - return ARM_GPR_NOR9_TC; - else - return ARM_GPR_R9_TC; - } else - // R9 is either callee-saved or reserved; can't use it. - return ARM_GPR_NOR9_TC; + return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC; } tcGPRClass::iterator tcGPRClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - GPRClass::iterator I; - - if (Subtarget.isThumb1Only()) { - I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned)); - return I; - } - - if (Subtarget.isTargetDarwin()) { - if (Subtarget.isR9Reserved()) - I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); - else - I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)); - } else - // R9 is either callee-saved or reserved; can't use it. - I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); - return I; + + if (Subtarget.isThumb1Only()) + return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned)); + + return Subtarget.isTargetDarwin() ? + ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) : + ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); } }]; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 10fd257055fb5..cb539f4c01ec8 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -33,14 +33,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , ARMFPUType(None) , UseNEONForSinglePrecisionFP(false) , SlowVMLx(false) + , SlowFPBrcc(false) , IsThumb(isT) , ThumbMode(Thumb1) + , NoARM(false) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) , UseMovt(UseMOVT) , HasFP16(false) , HasHardwareDivide(false) , HasT2ExtractPack(false) + , HasDataBarrier(false) + , Pref32BitThumb(false) + , FPOnlySP(false) , stackAlignment(4) , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index e7d92ede9b984..67e58038ee779 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,7 +26,7 @@ class GlobalValue; class ARMSubtarget : public TargetSubtarget { protected: enum ARMArchEnum { - V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M + V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M }; enum ARMFPEnum { @@ -63,6 +63,9 @@ protected: /// ThumbMode - Indicates supported Thumb version. ThumbTypeEnum ThumbMode; + /// NoARM - True if subtarget does not support ARM mode execution. + bool NoARM; + /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; @@ -84,6 +87,18 @@ protected: /// instructions. bool HasT2ExtractPack; + /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier + /// instructions. + bool HasDataBarrier; + + /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions + /// over 16-bit ones. + bool Pref32BitThumb; + + /// FPOnlySP - If true, the floating point unit only supports single + /// precision. + bool FPOnlySP; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -128,6 +143,8 @@ protected: bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; } bool hasV7Ops() const { return ARMArchVersion >= V7A; } + bool hasARMOps() const { return !NoARM; } + bool hasVFP2() const { return ARMFPUType >= VFPv2; } bool hasVFP3() const { return ARMFPUType >= VFPv3; } bool hasNEON() const { return ARMFPUType >= NEON; } @@ -135,8 +152,11 @@ protected: return hasNEON() && UseNEONForSinglePrecisionFP; } bool hasDivide() const { return HasHardwareDivide; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } + bool hasDataBarrier() const { return HasDataBarrier; } bool useVMLx() const {return hasVFP2() && !SlowVMLx; } bool isFPBrccSlow() const { return SlowFPBrcc; } + bool isFPOnlySP() const { return FPOnlySP; } + bool prefers32BitThumb() const { return Pref32BitThumb; } bool hasFP16() const { return HasFP16; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 09203f9304df0..30ff8276cdaac 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -31,7 +31,6 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } } - extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); @@ -66,6 +65,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, "v128:64:128-v64:64:64-n32")), TLInfo(*this), TSInfo(*this) { + if (!Subtarget.hasARMOps()) + report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " + "support ARM mode execution!"); } ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, @@ -85,9 +87,15 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, TSInfo(*this) { } +// Pass Pipeline Configuration +bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (OptLevel != CodeGenOpt::None) + PM.add(createARMGlobalMergePass(getTargetLowering())); + return false; +} -// Pass Pipeline Configuration bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { PM.add(createARMISelDag(*this, OptLevel)); @@ -132,7 +140,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (Subtarget.isThumb2()) + if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb()) PM.add(createThumb2SizeReductionPass()); PM.add(createARMConstantIslandPass()); diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index a222e57b13ff3..17e5425a9d370 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -50,6 +50,7 @@ public: } // Pass Pipeline Configuration + virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 4b083244b2413..75e2a739bf1f8 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMSubtarget.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -18,8 +19,10 @@ #include "llvm/Target/TargetAsmParser.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" using namespace llvm; @@ -37,6 +40,7 @@ enum ShiftType { class ARMAsmParser : public TargetAsmParser { MCAsmParser &Parser; + TargetMachine &TM; private: MCAsmParser &getParser() const { return Parser; } @@ -76,26 +80,33 @@ private: bool ParseDirectiveSyntax(SMLoc L); - // TODO - For now hacked versions of the next two are in here in this file to - // allow some parser testing until the table gen versions are implemented. + bool MatchInstruction(SMLoc IDLoc, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCInst &Inst) { + if (!MatchInstructionImpl(Operands, Inst)) + return false; + + // FIXME: We should give nicer diagnostics about the exact failure. + Error(IDLoc, "unrecognized instruction"); + + return true; + } /// @name Auto-generated Match Functions /// { - bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCInst &Inst); - /// MatchRegisterName - Match the given string to a register name and return - /// its register number, or -1 if there is no match. To allow return values - /// to be used directly in register lists, arm registers have values between - /// 0 and 15. - int MatchRegisterName(StringRef Name); + unsigned ComputeAvailableFeatures(const ARMSubtarget *Subtarget) const; + + bool MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*> + &Operands, + MCInst &Inst); /// } public: - ARMAsmParser(const Target &T, MCAsmParser &_Parser) - : TargetAsmParser(T), Parser(_Parser) {} + ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) + : TargetAsmParser(T), Parser(_Parser), TM(_TM) {} virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -110,16 +121,21 @@ private: ARMOperand() {} public: enum KindTy { - Token, - Register, + CondCode, Immediate, - Memory + Memory, + Register, + Token } Kind; SMLoc StartLoc, EndLoc; union { struct { + ARMCC::CondCodes Val; + } CC; + + struct { const char *Data; unsigned Length; } Tok; @@ -151,16 +167,19 @@ public: }; - ARMOperand(KindTy K, SMLoc S, SMLoc E) - : Kind(K), StartLoc(S), EndLoc(E) {} + //ARMOperand(KindTy K, SMLoc S, SMLoc E) + // : Kind(K), StartLoc(S), EndLoc(E) {} ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; switch (Kind) { + case CondCode: + CC = o.CC; + break; case Token: - Tok = o.Tok; + Tok = o.Tok; break; case Register: Reg = o.Reg; @@ -179,6 +198,11 @@ public: /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } + ARMCC::CondCodes getCondCode() const { + assert(Kind == CondCode && "Invalid access!"); + return CC.Val; + } + StringRef getToken() const { assert(Kind == Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); @@ -194,15 +218,50 @@ public: return Imm.Val; } - bool isToken() const {return Kind == Token; } + bool isCondCode() const { return Kind == CondCode; } + + bool isImm() const { return Kind == Immediate; } bool isReg() const { return Kind == Register; } + bool isToken() const {return Kind == Token; } + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + + void addCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); + // FIXME: What belongs here? + Inst.addOperand(MCOperand::CreateReg(0)); + } + void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); } + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + virtual void dump(raw_ostream &OS) const; + + static void CreateCondCode(OwningPtr<ARMOperand> &Op, ARMCC::CondCodes CC, + SMLoc S) { + Op.reset(new ARMOperand); + Op->Kind = CondCode; + Op->CC.Val = CC; + Op->StartLoc = S; + Op->EndLoc = S; + } + static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str, SMLoc S) { Op.reset(new ARMOperand); @@ -262,6 +321,33 @@ public: } // end anonymous namespace. +void ARMOperand::dump(raw_ostream &OS) const { + switch (Kind) { + case CondCode: + OS << ARMCondCodeToString(getCondCode()); + break; + case Immediate: + getImm()->print(OS); + break; + case Memory: + OS << "<memory>"; + break; + case Register: + OS << "<register " << getReg() << ">"; + break; + case Token: + OS << "'" << getToken() << "'"; + break; + } +} + +/// @name Auto-generated Match Functions +/// { + +static unsigned MatchRegisterName(StringRef Name); + +/// } + /// Try to parse a register name. The token must be an Identifier when called, /// and if it is a register name a Reg operand is created, the token is eaten /// and false is returned. Else true is returned and no token is eaten. @@ -548,77 +634,6 @@ bool ARMAsmParser::ParseShift(ShiftType &St, return false; } -/// A hack to allow some testing, to be replaced by a real table gen version. -int ARMAsmParser::MatchRegisterName(StringRef Name) { - if (Name == "r0" || Name == "R0") - return 0; - else if (Name == "r1" || Name == "R1") - return 1; - else if (Name == "r2" || Name == "R2") - return 2; - else if (Name == "r3" || Name == "R3") - return 3; - else if (Name == "r3" || Name == "R3") - return 3; - else if (Name == "r4" || Name == "R4") - return 4; - else if (Name == "r5" || Name == "R5") - return 5; - else if (Name == "r6" || Name == "R6") - return 6; - else if (Name == "r7" || Name == "R7") - return 7; - else if (Name == "r8" || Name == "R8") - return 8; - else if (Name == "r9" || Name == "R9") - return 9; - else if (Name == "r10" || Name == "R10") - return 10; - else if (Name == "r11" || Name == "R11" || Name == "fp") - return 11; - else if (Name == "r12" || Name == "R12" || Name == "ip") - return 12; - else if (Name == "r13" || Name == "R13" || Name == "sp") - return 13; - else if (Name == "r14" || Name == "R14" || Name == "lr") - return 14; - else if (Name == "r15" || Name == "R15" || Name == "pc") - return 15; - return -1; -} - -/// A hack to allow some testing, to be replaced by a real table gen version. -bool ARMAsmParser:: -MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCInst &Inst) { - ARMOperand &Op0 = *(ARMOperand*)Operands[0]; - assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); - StringRef Mnemonic = Op0.getToken(); - if (Mnemonic == "add" || - Mnemonic == "stmfd" || - Mnemonic == "str" || - Mnemonic == "ldmfd" || - Mnemonic == "ldr" || - Mnemonic == "mov" || - Mnemonic == "sub" || - Mnemonic == "bl" || - Mnemonic == "push" || - Mnemonic == "blx" || - Mnemonic == "pop") { - // Hard-coded to a valid instruction, till we have a real matcher. - Inst = MCInst(); - Inst.setOpcode(ARM::MOVr); - Inst.addOperand(MCOperand::CreateReg(2)); - Inst.addOperand(MCOperand::CreateReg(2)); - Inst.addOperand(MCOperand::CreateImm(0)); - Inst.addOperand(MCOperand::CreateImm(0)); - Inst.addOperand(MCOperand::CreateReg(0)); - return false; - } - - return true; -} - /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) { @@ -661,12 +676,56 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) { bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { OwningPtr<ARMOperand> Op; - ARMOperand::CreateToken(Op, Name, NameLoc); - + + // Create the leading tokens for the mnemonic, split by '.' characters. + size_t Start = 0, Next = Name.find('.'); + StringRef Head = Name.slice(Start, Next); + + // Determine the predicate, if any. + // + // FIXME: We need a way to check whether a prefix supports predication, + // otherwise we will end up with an ambiguity for instructions that happen to + // end with a predicate name. + unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2)) + .Case("eq", ARMCC::EQ) + .Case("ne", ARMCC::NE) + .Case("hs", ARMCC::HS) + .Case("lo", ARMCC::LO) + .Case("mi", ARMCC::MI) + .Case("pl", ARMCC::PL) + .Case("vs", ARMCC::VS) + .Case("vc", ARMCC::VC) + .Case("hi", ARMCC::HI) + .Case("ls", ARMCC::LS) + .Case("ge", ARMCC::GE) + .Case("lt", ARMCC::LT) + .Case("gt", ARMCC::GT) + .Case("le", ARMCC::LE) + .Case("al", ARMCC::AL) + .Default(~0U); + if (CC != ~0U) { + Head = Head.slice(0, Head.size() - 2); + } else + CC = ARMCC::AL; + + ARMOperand::CreateToken(Op, Head, NameLoc); Operands.push_back(Op.take()); - if (getLexer().isNot(AsmToken::EndOfStatement)) { + ARMOperand::CreateCondCode(Op, ARMCC::CondCodes(CC), NameLoc); + Operands.push_back(Op.take()); + + // Add the remaining tokens in the mnemonic. + while (Next != StringRef::npos) { + Start = Next; + Next = Name.find('.', Start + 1); + Head = Name.slice(Start, Next); + ARMOperand::CreateToken(Op, Head, NameLoc); + Operands.push_back(Op.take()); + } + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. OwningPtr<ARMOperand> Op; if (ParseOperand(Op)) return true; @@ -809,3 +868,5 @@ extern "C" void LLVMInitializeARMAsmParser() { RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); LLVMInitializeARMAsmLexer(); } + +#include "ARMGenAsmMatcher.inc" diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index edc934549b288..8026e7718ca98 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -158,7 +158,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) && MI->getOperand(0).getReg() == ARM::SP) { const MCOperand &MO1 = MI->getOperand(2); - if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) { + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) { O << '\t' << "vpush"; printPredicateOperand(MI, 3, O); O << '\t'; @@ -171,7 +171,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) && MI->getOperand(0).getReg() == ARM::SP) { const MCOperand &MO1 = MI->getOperand(2); - if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) { + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) { O << '\t' << "vpop"; printPredicateOperand(MI, 3, O); O << '\t'; @@ -278,15 +278,13 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, O << getRegisterName(MO1.getReg()); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())) - << ' '; - + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (MO2.getReg()) { - O << getRegisterName(MO2.getReg()); + O << ' ' << getRegisterName(MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else { - O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } else if (ShOpc != ARM_AM::rrx) { + O << " #" << ARM_AM::getSORegOffset(MO3.getImm()); } } @@ -414,16 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, return; } - if (Modifier && strcmp(Modifier, "submode") == 0) { - ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - O << ARM_AM::getAMSubModeStr(Mode); - return; - } else if (Modifier && strcmp(Modifier, "base") == 0) { - // Used for FSTM{D|S} and LSTM{D|S} operations. - O << getRegisterName(MO1.getReg()); - return; - } - O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { @@ -463,9 +451,9 @@ void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum, assert(0 && "FIXME: Implement printAddrModePCOperand"); } -void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); uint32_t v = ~MO.getImm(); int32_t lsb = CountTrailingZeros_32(v); @@ -474,6 +462,31 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI, O << '#' << lsb << ", #" << width; } +void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned val = MI->getOperand(OpNum).getImm(); + O << ARM_MB::MemBOptToString(val); +} + +void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned ShiftOp = MI->getOperand(OpNum).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + switch (Opc) { + case ARM_AM::no_shift: + return; + case ARM_AM::lsl: + O << ", lsl #"; + break; + case ARM_AM::asr: + O << ", asr #"; + break; + default: + assert(0 && "unexpected shift opcode for shift immediate operand"); + } + O << ARM_AM::getSORegOffset(ShiftOp); +} + void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << "{"; @@ -669,12 +682,11 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, O << getRegisterName(Reg); // Print the shift opc. - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())) - << " "; - assert(MO2.isImm() && "Not a valid t2_so_reg value!"); - O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); + if (ShOpc != ARM_AM::rrx) + O << " #" << ARM_AM::getSORegOffset(MO2.getImm()); } void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI, diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index ddf5047793d29..e5ad0d07e9bab 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -57,6 +57,8 @@ public: void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt index 4e299f86ecb67..18645c0864a32 100644 --- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt +++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt @@ -1,8 +1,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMARMAsmPrinter - ARMAsmPrinter.cpp ARMInstPrinter.cpp - ARMMCInstLower.cpp ) add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen) diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 0df34666b959b..6b4dee5965d25 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -7,25 +7,32 @@ tablegen(ARMGenInstrNames.inc -gen-instr-enums) tablegen(ARMGenInstrInfo.inc -gen-instr-desc) tablegen(ARMGenCodeEmitter.inc -gen-emitter) tablegen(ARMGenAsmWriter.inc -gen-asm-writer) +tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher) tablegen(ARMGenDAGISel.inc -gen-dag-isel) +tablegen(ARMGenFastISel.inc -gen-fast-isel) tablegen(ARMGenCallingConv.inc -gen-callingconv) tablegen(ARMGenSubtarget.inc -gen-subtarget) tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info) add_llvm_target(ARMCodeGen + ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp ARMExpandPseudoInsts.cpp + ARMFastISel.cpp + ARMGlobalMerge.cpp ARMISelDAGToDAG.cpp ARMISelLowering.cpp ARMInstrInfo.cpp ARMJITInfo.cpp ARMLoadStoreOptimizer.cpp ARMMCAsmInfo.cpp + ARMMCInstLower.cpp ARMRegisterInfo.cpp + ARMSelectionDAGInfo.cpp ARMSubtarget.cpp ARMTargetMachine.cpp ARMTargetObjectFile.cpp @@ -38,7 +45,6 @@ add_llvm_target(ARMCodeGen Thumb2InstrInfo.cpp Thumb2RegisterInfo.cpp Thumb2SizeReduction.cpp - ARMSelectionDAGInfo.cpp ) -target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG) +target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 4de697e8bf676..e22028985b46c 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -26,6 +26,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +//#define DEBUG(X) do { X; } while (0) + /// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from /// ARMDecoderEmitter.cpp TableGen backend. It contains: /// @@ -87,6 +89,11 @@ static unsigned decodeARMInstruction(uint32_t &insn) { return ARM::BFI; } + // Ditto for STRBT, which is a super-instruction for A8.6.199 Encoding A1 & A2. + // As a result, the decoder fails to deocode USAT properly. + if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1) + return ARM::USAT; + // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8. // As a result, the decoder fails to decode UMULL properly. if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) { @@ -106,7 +113,7 @@ static unsigned decodeARMInstruction(uint32_t &insn) { // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2. // As a result, the decoder fails to deocode SSAT properly. if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1) - return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr; + return ARM::SSAT; // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147. // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT. @@ -291,7 +298,7 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) { /// decodeInstruction(insn) is invoked on the original insn. /// /// Otherwise, decodeThumbInstruction is called with the original insn. -static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) { +static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) { if (IsThumb2) { uint16_t op1 = slice(insn, 28, 27); uint16_t op2 = slice(insn, 26, 20); @@ -429,7 +436,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to // the top half followed by the second halfword. - uint32_t insn = 0; + unsigned insn = 0; // Possible second halfword. uint16_t insn1 = 0; diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index a07ff2832aa7e..9f493b9aee02a 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -20,6 +20,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +//#define DEBUG(X) do { X; } while (0) + /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const /// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s /// describing the operand info for each ARMInsts[i]. @@ -93,6 +95,9 @@ static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister, RegClassID = ARM::DPRRegClassID; } + // For this purpose, we can treat rGPR as if it were GPR. + if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID; + // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm(). unsigned RegNum = RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister; @@ -451,12 +456,23 @@ static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) { // // A8-11: DecodeImmShift() static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) { - // If type == 0b11 and imm5 == 0, we have an rrx, instead. - if (ShOp == ARM_AM::ror && ShImm == 0) - ShOp = ARM_AM::rrx; - // If (lsr or asr) and imm5 == 0, shift amount is 32. - if ((ShOp == ARM_AM::lsr || ShOp == ARM_AM::asr) && ShImm == 0) + if (ShImm != 0) + return; + switch (ShOp) { + case ARM_AM::no_shift: + case ARM_AM::rrx: + break; + case ARM_AM::lsl: + ShOp = ARM_AM::no_shift; + break; + case ARM_AM::lsr: + case ARM_AM::asr: ShImm = 32; + break; + case ARM_AM::ror: + ShOp = ARM_AM::rrx; + break; + } } // getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding @@ -490,9 +506,6 @@ static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) { static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { - if (Opcode == ARM::Int_MemBarrierV7 || Opcode == ARM::Int_SyncBarrierV7) - return true; - assert(0 && "Unexpected pseudo instruction!"); return false; } @@ -887,7 +900,6 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } - assert(0 && "Unexpected BrMiscFrm Opcode"); return false; } @@ -906,34 +918,6 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) { return true; } -static inline bool SaturateOpcode(unsigned Opcode) { - switch (Opcode) { - case ARM::SSATlsl: case ARM::SSATasr: case ARM::SSAT16: - case ARM::USATlsl: case ARM::USATasr: case ARM::USAT16: - return true; - default: - return false; - } -} - -static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) { - switch (Opcode) { - case ARM::SSATlsl: - case ARM::SSATasr: - return slice(insn, 20, 16) + 1; - case ARM::SSAT16: - return slice(insn, 19, 16) + 1; - case ARM::USATlsl: - case ARM::USATasr: - return slice(insn, 20, 16); - case ARM::USAT16: - return slice(insn, 19, 16); - default: - assert(0 && "Invalid opcode passed in"); - return 0; - } -} - // A major complication is the fact that some of the saturating add/subtract // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm. // They are QADD, QDADD, QDSUB, and QSUB. @@ -959,40 +943,14 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpIdx >= NumOps) return false; - // SSAT/SSAT16/USAT/USAT16 has imm operand after Rd. - if (SaturateOpcode(Opcode)) { - MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - - if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) { - OpIdx += 2; - return true; - } - - // For SSAT operand reg (Rm) has been disassembled above. - // Now disassemble the shift amount. - - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShAmt = slice(insn, 11, 7); - - // A8.6.183. Possible ASR shift amount of 32... - if (Opcode == ARM::SSATasr && ShAmt == 0) - ShAmt = 32; - - MI.addOperand(MCOperand::CreateImm(ShAmt)); - - OpIdx += 3; - return true; - } - // Special-case handling of BFC/BFI/SBFX/UBFX. if (Opcode == ARM::BFC || Opcode == ARM::BFI) { - // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI. - MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0 - : getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(0)); + if (Opcode == ARM::BFI) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); + ++OpIdx; + } uint32_t mask = 0; if (!getBFCInvMask(insn, mask)) return false; @@ -1498,13 +1456,55 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Extract the 5-bit immediate field Inst{11-7}. unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F; - MI.addOperand(MCOperand::CreateImm(ShiftAmt)); + ARM_AM::ShiftOpc Opc = ARM_AM::no_shift; + if (Opcode == ARM::PKHBT) + Opc = ARM_AM::lsl; + else if (Opcode == ARM::PKHBT) + Opc = ARM_AM::asr; + getImmShiftSE(Opc, ShiftAmt); + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt))); ++OpIdx; } return true; } +/// DisassembleSatFrm - Disassemble saturate instructions: +/// SSAT, SSAT16, USAT, and USAT16. +static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + + const TargetInstrDesc &TID = ARMInsts[Opcode]; + NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands + + // Disassemble register def. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + decodeRd(insn)))); + + unsigned Pos = slice(insn, 20, 16); + if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16) + Pos += 1; + MI.addOperand(MCOperand::CreateImm(Pos)); + + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + decodeRm(insn)))); + + if (NumOpsAdded == 4) { + ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl); + // Inst{11-7} encodes the imm5 shift amount. + unsigned ShAmt = slice(insn, 11, 7); + if (ShAmt == 0) { + // A8.6.183. Possible ASR shift amount of 32... + if (Opc == ARM_AM::asr) + ShAmt = 32; + else + Opc = ARM_AM::no_shift; + } + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt))); + } + return true; +} + // Extend instructions. // SXT* and UXT*: Rd [Rn] Rm [rot_imm]. // The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the @@ -1863,7 +1863,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3"); - bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS) ? true : false; + bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS); unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; // Extract Dd/Sd for operand 0. @@ -1886,7 +1886,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // VFP Load/Store Multiple Instructions. // This is similar to the algorithm for LDM/STM in that operand 0 (the base) and -// operand 1 (the AM5 mode imm) is followed by two predicate operands. It is +// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is // followed by a reglist of either DPR(s) or SPR(s). // // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD] @@ -1910,16 +1910,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(Base)); - // Next comes the AM5 Opcode. + // Next comes the AM4 Opcode. ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); // Must be either "ia" or "db" submode. if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) { - DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n"); + DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n"); return false; } - - unsigned char Imm8 = insn & 0xFF; - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8))); + MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode))); // Handling the two predicate operands before the reglist. int64_t CondVal = insn >> ARMII::CondShift; @@ -1929,13 +1927,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx += 4; bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD || - Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD) ? true : false; + Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD); unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; // Extract Dd/Sd. unsigned RegD = decodeVFPRd(insn, isSPVFP); // Fill the variadic part of reglist. + unsigned char Imm8 = insn & 0xFF; unsigned Regs = isSPVFP ? Imm8 : Imm8/2; for (unsigned i = 0; i < Regs; ++i) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, @@ -2244,9 +2243,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // We have homogeneous NEON registers for Load/Store. unsigned RegClass = 0; + bool DRegPair = UseDRegPair(Opcode); // Double-spaced registers have increments of 2. - unsigned Inc = DblSpaced ? 2 : 1; + unsigned Inc = (DblSpaced || DRegPair) ? 2 : 1; unsigned Rn = decodeRn(insn); unsigned Rm = decodeRm(insn); @@ -2292,8 +2292,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, RegClass = OpInfo[OpIdx].RegClass; while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd, - UseDRegPair(Opcode)))); + getRegisterEnum(B, RegClass, Rd, DRegPair))); Rd += Inc; ++OpIdx; } @@ -2312,8 +2311,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd, - UseDRegPair(Opcode)))); + getRegisterEnum(B, RegClass, Rd, DRegPair))); Rd += Inc; ++OpIdx; } @@ -2351,6 +2349,11 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } } + // Accessing registers past the end of the NEON register file is not + // defined. + if (Rd > 32) + return false; + return true; } @@ -2423,10 +2426,14 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, break; case ARM::VMOVv4i16: case ARM::VMOVv8i16: + case ARM::VMVNv4i16: + case ARM::VMVNv8i16: esize = ESize16; break; case ARM::VMOVv2i32: case ARM::VMOVv4i32: + case ARM::VMVNv2i32: + case ARM::VMVNv4i32: esize = ESize32; break; case ARM::VMOVv1i64: @@ -2944,7 +2951,7 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.6.49 ISB static inline bool MemBarrierInstr(uint32_t insn) { unsigned op7_4 = slice(insn, 7, 4); - if (slice(insn, 31, 20) == 0xf57 && (op7_4 >= 4 && op7_4 <= 6)) + if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6)) return true; return false; @@ -3001,8 +3008,15 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - if (MemBarrierInstr(insn)) + if (MemBarrierInstr(insn)) { + // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care + // of within the generic ARMBasicMCBuilder::BuildIt() method. + // + // Inst{3-0} encodes the memory barrier option for the variants. + MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0))); + NumOpsAdded = 1; return true; + } switch (Opcode) { case ARM::CLREX: @@ -3073,6 +3087,7 @@ static const DisassembleFP FuncPtrs[] = { &DisassembleLdStMulFrm, &DisassembleLdStExFrm, &DisassembleArithMiscFrm, + &DisassembleSatFrm, &DisassembleExtFrm, &DisassembleVFPUnaryFrm, &DisassembleVFPBinaryFrm, diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index 7d21256a14f9f..9c30d332d1f20 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -23,7 +23,8 @@ #include "llvm/MC/MCInst.h" #include "llvm/Target/TargetInstrInfo.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" +#include "ARMRegisterInfo.h" #include "ARMDisassembler.h" namespace llvm { @@ -53,36 +54,35 @@ public: ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \ ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \ ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \ - ENTRY(ARM_FORMAT_EXTFRM, 13) \ - ENTRY(ARM_FORMAT_VFPUNARYFRM, 14) \ - ENTRY(ARM_FORMAT_VFPBINARYFRM, 15) \ - ENTRY(ARM_FORMAT_VFPCONV1FRM, 16) \ - ENTRY(ARM_FORMAT_VFPCONV2FRM, 17) \ - ENTRY(ARM_FORMAT_VFPCONV3FRM, 18) \ - ENTRY(ARM_FORMAT_VFPCONV4FRM, 19) \ - ENTRY(ARM_FORMAT_VFPCONV5FRM, 20) \ - ENTRY(ARM_FORMAT_VFPLDSTFRM, 21) \ - ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \ - ENTRY(ARM_FORMAT_VFPMISCFRM, 23) \ - ENTRY(ARM_FORMAT_THUMBFRM, 24) \ - ENTRY(ARM_FORMAT_NEONFRM, 25) \ - ENTRY(ARM_FORMAT_NEONGETLNFRM, 26) \ - ENTRY(ARM_FORMAT_NEONSETLNFRM, 27) \ - ENTRY(ARM_FORMAT_NEONDUPFRM, 28) \ - ENTRY(ARM_FORMAT_MISCFRM, 29) \ - ENTRY(ARM_FORMAT_THUMBMISCFRM, 30) \ - ENTRY(ARM_FORMAT_NLdSt, 31) \ - ENTRY(ARM_FORMAT_N1RegModImm, 32) \ - ENTRY(ARM_FORMAT_N2Reg, 33) \ - ENTRY(ARM_FORMAT_NVCVT, 34) \ - ENTRY(ARM_FORMAT_NVecDupLn, 35) \ - ENTRY(ARM_FORMAT_N2RegVecShL, 36) \ - ENTRY(ARM_FORMAT_N2RegVecShR, 37) \ - ENTRY(ARM_FORMAT_N3Reg, 38) \ - ENTRY(ARM_FORMAT_N3RegVecSh, 39) \ - ENTRY(ARM_FORMAT_NVecExtract, 40) \ - ENTRY(ARM_FORMAT_NVecMulScalar, 41) \ - ENTRY(ARM_FORMAT_NVTBL, 42) + ENTRY(ARM_FORMAT_SATFRM, 13) \ + ENTRY(ARM_FORMAT_EXTFRM, 14) \ + ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \ + ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \ + ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \ + ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \ + ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \ + ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \ + ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \ + ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \ + ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \ + ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \ + ENTRY(ARM_FORMAT_THUMBFRM, 25) \ + ENTRY(ARM_FORMAT_MISCFRM, 26) \ + ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \ + ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \ + ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \ + ENTRY(ARM_FORMAT_NLdSt, 30) \ + ENTRY(ARM_FORMAT_N1RegModImm, 31) \ + ENTRY(ARM_FORMAT_N2Reg, 32) \ + ENTRY(ARM_FORMAT_NVCVT, 33) \ + ENTRY(ARM_FORMAT_NVecDupLn, 34) \ + ENTRY(ARM_FORMAT_N2RegVecShL, 35) \ + ENTRY(ARM_FORMAT_N2RegVecShR, 36) \ + ENTRY(ARM_FORMAT_N3Reg, 37) \ + ENTRY(ARM_FORMAT_N3RegVecSh, 38) \ + ENTRY(ARM_FORMAT_NVecExtract, 39) \ + ENTRY(ARM_FORMAT_NVecMulScalar, 40) \ + ENTRY(ARM_FORMAT_NVTBL, 41) // ARM instruction format specifies the encoding used by the instruction. #define ENTRY(n, v) n = v, @@ -126,8 +126,8 @@ static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) { } /// Utility function for setting [From, To] bits to Val for a uint32_t. -static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To, - uint32_t Val) { +static inline void setSlice(unsigned &Bits, unsigned From, unsigned To, + unsigned Val) { assert(From < 32 && To < 32 && From >= To); uint32_t Mask = ((1 << (From - To + 1)) - 1); Bits &= ~(Mask << To); diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 4b7a0bf6fdb91..112817b13cf90 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -103,7 +103,7 @@ static inline unsigned getT1Cond(uint32_t insn) { } static inline bool IsGPR(unsigned RegClass) { - return RegClass == ARM::GPRRegClassID; + return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID; } // Utilities for 32-bit Thumb instructions. @@ -220,7 +220,7 @@ static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5, switch (bits2) { default: assert(0 && "No such value"); case 0: - ShOp = ARM_AM::lsl; + ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl); return imm5; case 1: ShOp = ARM_AM::lsr; @@ -1324,7 +1324,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, && OpInfo[1].RegClass == ARM::GPRRegClassID && OpInfo[2].RegClass < 0 && OpInfo[3].RegClass < 0 - && "Exactlt 4 operands expect and first two as reg operands"); + && "Exactly 4 operands expect and first two as reg operands"); // Only need to populate the src reg operand. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); @@ -1338,17 +1338,20 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID + && (OpInfo[0].RegClass == ARM::GPRRegClassID || + OpInfo[0].RegClass == ARM::rGPRRegClassID) + && (OpInfo[1].RegClass == ARM::GPRRegClassID || + OpInfo[1].RegClass == ARM::rGPRRegClassID) && "Expect >= 2 operands and first two as reg operands"); - bool ThreeReg = (NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID); + bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID || + OpInfo[2].RegClass == ARM::rGPRRegClassID)); bool NoDstReg = (decodeRs(insn) == 0xF); // Build the register operands, followed by the constant shift specifier. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, + getRegisterEnum(B, OpInfo[0].RegClass, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; @@ -1359,7 +1362,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MI.getOperand(Idx)); ++OpIdx; } else if (!NoDstReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass, decodeRn(insn)))); ++OpIdx; } else { @@ -1368,7 +1371,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, } } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeRm(insn)))); ++OpIdx; @@ -1386,14 +1389,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned imm5 = getShiftAmtBits(insn); ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift; unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp); - - // PKHBT/PKHTB are special in that we need the decodeImmShift() call to - // decode the shift amount from raw imm5 and bits2, but we DO NOT need - // to encode the ShOp, as it's in the asm string already. - if (Opcode == ARM::t2PKHBT || Opcode == ARM::t2PKHTB) - MI.addOperand(MCOperand::CreateImm(ShAmt)); - else - MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt))); + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt))); } ++OpIdx; } @@ -1416,16 +1412,20 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, OpIdx = 0; - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID + unsigned RdRegClassID = OpInfo[0].RegClass; + assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID || + RdRegClassID == ARM::rGPRRegClassID) && "Expect >= 2 operands and first one as reg operand"); - bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID); + unsigned RnRegClassID = OpInfo[1].RegClass; + bool TwoReg = (RnRegClassID == ARM::GPRRegClassID + || RnRegClassID == ARM::rGPRRegClassID); bool NoDstReg = (decodeRs(insn) == 0xF); // Build the register operands, followed by the modified immediate. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, + getRegisterEnum(B, RdRegClassID, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; @@ -1434,7 +1434,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n"); return false; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1455,30 +1455,48 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, static inline bool Thumb2SaturateOpcode(unsigned Opcode) { switch (Opcode) { - case ARM::t2SSATlsl: case ARM::t2SSATasr: case ARM::t2SSAT16: - case ARM::t2USATlsl: case ARM::t2USATasr: case ARM::t2USAT16: + case ARM::t2SSAT: case ARM::t2SSAT16: + case ARM::t2USAT: case ARM::t2USAT16: return true; default: return false; } } -static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { - switch (Opcode) { - case ARM::t2SSATlsl: - case ARM::t2SSATasr: - return slice(insn, 4, 0) + 1; - case ARM::t2SSAT16: - return slice(insn, 3, 0) + 1; - case ARM::t2USATlsl: - case ARM::t2USATasr: - return slice(insn, 4, 0); - case ARM::t2USAT16: - return slice(insn, 3, 0); - default: - assert(0 && "Unexpected opcode"); - return 0; +/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions: +/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt +/// o t2SSAT16, t2USAT16: Rs sat_pos Rn +static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, + unsigned &NumOpsAdded, BO B) { + const TargetInstrDesc &TID = ARMInsts[Opcode]; + NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands + + // Disassemble the register def. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + decodeRs(insn)))); + + unsigned Pos = slice(insn, 4, 0); + if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16) + Pos += 1; + MI.addOperand(MCOperand::CreateImm(Pos)); + + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + decodeRn(insn)))); + + if (NumOpsAdded == 4) { + ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ? + ARM_AM::asr : ARM_AM::lsl); + // Inst{14-12:7-6} encodes the imm5 shift amount. + unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6); + if (ShAmt == 0) { + if (Opc == ARM_AM::asr) + ShAmt = 32; + else + Opc = ARM_AM::no_shift; + } + MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt))); } + return true; } // A6.3.3 Data-processing (plain binary immediate) @@ -1492,11 +1510,6 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { // o t2SBFX (SBFX): Rs Rn lsb width // o t2UBFX (UBFX): Rs Rn lsb width // o t2BFI (BFI): Rs Rn lsb width -// -// [Signed|Unsigned] Saturate [16] -// -// o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt -// o t2SSAT16, t2USAT16: Rs sat_pos Rn static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -1506,41 +1519,21 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, OpIdx = 0; - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID + unsigned RdRegClassID = OpInfo[0].RegClass; + assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID || + RdRegClassID == ARM::rGPRRegClassID) && "Expect >= 2 operands and first one as reg operand"); - bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID); + unsigned RnRegClassID = OpInfo[1].RegClass; + bool TwoReg = (RnRegClassID == ARM::GPRRegClassID + || RnRegClassID == ARM::rGPRRegClassID); // Build the register operand(s), followed by the immediate(s). - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID, decodeRs(insn)))); ++OpIdx; - // t2SSAT/t2SSAT16/t2USAT/t2USAT16 has imm operand after Rd. - if (Thumb2SaturateOpcode(Opcode)) { - MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn))); - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - - if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) { - OpIdx += 2; - return true; - } - - // For SSAT operand reg (Rn) has been disassembled above. - // Now disassemble the shift amount. - - // Inst{14-12:7-6} encodes the imm5 shift amount. - unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6); - - MI.addOperand(MCOperand::CreateImm(ShAmt)); - - OpIdx += 3; - return true; - } - if (TwoReg) { assert(NumOps >= 3 && "Expect >= 3 operands"); int Idx; @@ -1549,12 +1542,19 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MI.getOperand(Idx)); } else { // Add src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, decodeRn(insn)))); } ++OpIdx; } + if (Opcode == ARM::t2BFI) { + // Add val reg operand. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, + decodeRn(insn)))); + ++OpIdx; + } + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1567,7 +1567,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC) { + else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { uint32_t mask = 0; if (getBitfieldInvMask(insn, mask)) MI.addOperand(MCOperand::CreateImm(mask)); @@ -1575,17 +1575,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, return false; } else { // Handle the case of: lsb width - assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX || - Opcode == ARM::t2BFI) && "Unexpected opcode"); + assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX) + && "Unexpected opcode"); MI.addOperand(MCOperand::CreateImm(getLsb(insn))); - if (Opcode == ARM::t2BFI) { - if (getMsb(insn) < getLsb(insn)) { - DEBUG(errs() << "Encoding error: msb < lsb\n"); - return false; - } - MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1)); - } else - MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); + MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); ++OpIdx; } @@ -1618,8 +1611,8 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) { // A8.6.26 // t2BXJ -> Rn // -// Miscellaneous control: t2Int_MemBarrierV7 (and its t2DMB variants), -// t2Int_SyncBarrierV7 (and its t2DSB varianst), t2ISBsy, t2CLREX +// Miscellaneous control: t2DMBsy (and its t2DMB variants), +// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX // -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants) // // Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV @@ -1959,25 +1952,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass == ARM::rGPRRegClassID && + OpInfo[1].RegClass == ARM::rGPRRegClassID && "Expect >= 2 operands and first two as reg operands"); // Build the register operands, followed by the optional rotation amount. - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; + bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRs(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -2009,26 +2002,26 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && - OpInfo[2].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass == ARM::rGPRRegClassID && + OpInfo[1].RegClass == ARM::rGPRRegClassID && + OpInfo[2].RegClass == ARM::rGPRRegClassID && "Expect >= 3 operands and first three as reg operands"); // Build the register operands. - bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID; + bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRm(insn)))); if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRd(insn)))); NumOpsAdded = FourReg ? 4 : 3; @@ -2054,26 +2047,26 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && - OpInfo[2].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass == ARM::rGPRRegClassID && + OpInfo[1].RegClass == ARM::rGPRRegClassID && + OpInfo[2].RegClass == ARM::rGPRRegClassID && "Expect >= 3 operands and first three as reg operands"); - bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID; + bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID; // Build the register operands. if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, decodeRm(insn)))); if (FourReg) @@ -2152,22 +2145,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, break; case 2: if (op == 0) { - if (slice(op2, 5, 5) == 0) { + if (slice(op2, 5, 5) == 0) // Data-processing (modified immediate) return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded, B); - } else { - // Data-processing (plain binary immediate) - return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } - } else { - // Branches and miscellaneous control on page A6-20. - return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded, - B); - } + if (Thumb2SaturateOpcode(Opcode)) + return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B); - break; + // Data-processing (plain binary immediate) + return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded, + B); + } + // Branches and miscellaneous control on page A6-20. + return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded, + B); case 3: switch (slice(op2, 6, 5)) { case 0: diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index 9e3ff29e07c42..b3fcfaf6bda7d 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -14,10 +14,11 @@ TARGET = ARM # Make sure that tblgen is run, first thing. BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ - ARMGenInstrInfo.inc ARMGenAsmWriter.inc \ + ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ ARMGenDAGISel.inc ARMGenSubtarget.inc \ ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ - ARMGenDecoderTables.inc ARMGenEDInfo.inc + ARMGenDecoderTables.inc ARMGenEDInfo.inc \ + ARMGenFastISel.inc DIRS = AsmPrinter AsmParser Disassembler TargetInfo diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index bbdd3c7f7c3e0..97e54bfaed9e7 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -24,7 +24,7 @@ STATISTIC(NumVMovs, "Number of reg-reg moves converted"); namespace { struct NEONMoveFixPass : public MachineFunctionPass { static char ID; - NEONMoveFixPass() : MachineFunctionPass(&ID) {} + NEONMoveFixPass() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &Fn); diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index f67717cdd56f5..3407ac6fe08ec 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -23,7 +23,7 @@ namespace { public: static char ID; - NEONPreAllocPass() : MachineFunctionPass(&ID) {} + NEONPreAllocPass() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF); @@ -51,13 +51,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, default: break; - case ARM::VLD1q8: - case ARM::VLD1q16: - case ARM::VLD1q32: - case ARM::VLD1q64: - case ARM::VLD2d8: - case ARM::VLD2d16: - case ARM::VLD2d32: case ARM::VLD2LNd8: case ARM::VLD2LNd16: case ARM::VLD2LNd32: @@ -65,13 +58,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 2; return true; - case ARM::VLD2q8: - case ARM::VLD2q16: - case ARM::VLD2q32: - FirstOpnd = 0; - NumRegs = 4; - return true; - case ARM::VLD2LNq16: case ARM::VLD2LNq32: FirstOpnd = 0; @@ -88,10 +74,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VLD3d8: - case ARM::VLD3d16: - case ARM::VLD3d32: - case ARM::VLD1d64T: case ARM::VLD3LNd8: case ARM::VLD3LNd16: case ARM::VLD3LNd32: @@ -99,24 +81,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 3; return true; - case ARM::VLD3q8_UPD: - case ARM::VLD3q16_UPD: - case ARM::VLD3q32_UPD: - FirstOpnd = 0; - NumRegs = 3; - Offset = 0; - Stride = 2; - return true; - - case ARM::VLD3q8odd_UPD: - case ARM::VLD3q16odd_UPD: - case ARM::VLD3q32odd_UPD: - FirstOpnd = 0; - NumRegs = 3; - Offset = 1; - Stride = 2; - return true; - case ARM::VLD3LNq16: case ARM::VLD3LNq32: FirstOpnd = 0; @@ -133,10 +97,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VLD4d8: - case ARM::VLD4d16: - case ARM::VLD4d32: - case ARM::VLD1d64Q: case ARM::VLD4LNd8: case ARM::VLD4LNd16: case ARM::VLD4LNd32: @@ -144,24 +104,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VLD4q8_UPD: - case ARM::VLD4q16_UPD: - case ARM::VLD4q32_UPD: - FirstOpnd = 0; - NumRegs = 4; - Offset = 0; - Stride = 2; - return true; - - case ARM::VLD4q8odd_UPD: - case ARM::VLD4q16odd_UPD: - case ARM::VLD4q32odd_UPD: - FirstOpnd = 0; - NumRegs = 4; - Offset = 1; - Stride = 2; - return true; - case ARM::VLD4LNq16: case ARM::VLD4LNq32: FirstOpnd = 0; @@ -178,13 +120,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VST1q8: - case ARM::VST1q16: - case ARM::VST1q32: - case ARM::VST1q64: - case ARM::VST2d8: - case ARM::VST2d16: - case ARM::VST2d32: case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: @@ -192,13 +127,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 2; return true; - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - FirstOpnd = 2; - NumRegs = 4; - return true; - case ARM::VST2LNq16: case ARM::VST2LNq32: FirstOpnd = 2; @@ -215,10 +143,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VST3d8: - case ARM::VST3d16: - case ARM::VST3d32: - case ARM::VST1d64T: case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: @@ -226,24 +150,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 3; return true; - case ARM::VST3q8_UPD: - case ARM::VST3q16_UPD: - case ARM::VST3q32_UPD: - FirstOpnd = 4; - NumRegs = 3; - Offset = 0; - Stride = 2; - return true; - - case ARM::VST3q8odd_UPD: - case ARM::VST3q16odd_UPD: - case ARM::VST3q32odd_UPD: - FirstOpnd = 4; - NumRegs = 3; - Offset = 1; - Stride = 2; - return true; - case ARM::VST3LNq16: case ARM::VST3LNq32: FirstOpnd = 2; @@ -260,10 +166,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, Stride = 2; return true; - case ARM::VST4d8: - case ARM::VST4d16: - case ARM::VST4d32: - case ARM::VST1d64Q: case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: @@ -271,24 +173,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VST4q8_UPD: - case ARM::VST4q16_UPD: - case ARM::VST4q32_UPD: - FirstOpnd = 4; - NumRegs = 4; - Offset = 0; - Stride = 2; - return true; - - case ARM::VST4q8odd_UPD: - case ARM::VST4q16odd_UPD: - case ARM::VST4q32odd_UPD: - FirstOpnd = 4; - NumRegs = 4; - Offset = 1; - Stride = 2; - return true; - case ARM::VST4LNq16: case ARM::VST4LNq32: FirstOpnd = 2; @@ -468,7 +352,34 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { continue; if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) continue; - llvm_unreachable("expected a REG_SEQUENCE"); + + MachineBasicBlock::iterator NextI = llvm::next(MBBI); + for (unsigned R = 0; R < NumRegs; ++R) { + MachineOperand &MO = MI->getOperand(FirstOpnd + R); + assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + + // For now, just assign a fixed set of adjacent registers. + // This leaves plenty of room for future improvements. + static const unsigned NEONDRegs[] = { + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 + }; + MO.setReg(NEONDRegs[Offset + R * Stride]); + + if (MO.isUse()) { + // Insert a copy from VirtReg. + BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),MO.getReg()) + .addReg(VirtReg, getKillRegState(MO.isKill())); + MO.setIsKill(); + } else if (MO.isDef() && !MO.isDead()) { + // Add a copy to VirtReg. + BuildMI(MBB, NextI, DebugLoc(), TII->get(TargetOpcode::COPY), VirtReg) + .addReg(MO.getReg()); + } + } } return Modified; diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 0cb8ff01181d7..9fc3fb92cb2c1 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -611,27 +611,6 @@ constant which was already loaded). Not sure what's necessary to do that. //===---------------------------------------------------------------------===// -Given the following on ARMv7: -int test1(int A, int B) { - return (A&-8388481)|(B&8388480); -} - -We currently generate: - bfc r0, #7, #16 - movw r2, #:lower16:8388480 - movt r2, #:upper16:8388480 - and r1, r1, r2 - orr r0, r1, r0 - bx lr - -The following is much shorter: - lsr r1, r1, #7 - bfi r0, r1, #7, #16 - bx lr - - -//===---------------------------------------------------------------------===// - The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal: int a(int x) { return __builtin_bswap32(x); } @@ -657,3 +636,24 @@ A custom Thumb version would also be a slight improvement over the generic version. //===---------------------------------------------------------------------===// + +Consider the following simple C code: + +void foo(unsigned char *a, unsigned char *b, int *c) { + if ((*a | *b) == 0) *c = 0; +} + +currently llvm-gcc generates something like this (nice branchless code I'd say): + + ldrb r0, [r0] + ldrb r1, [r1] + orr r0, r1, r0 + tst r0, #255 + moveq r0, #0 + streq r0, [r2] + bx lr + +Note that both "tst" and "moveq" are redundant. + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 39b70b43b23f5..a21a3da10bdad 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -68,7 +68,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg); } -bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { +bool Thumb1RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); // It's not always a good idea to include the call frame as part of the @@ -363,107 +363,19 @@ static void removeOperands(MachineInstr &MI, unsigned i) { MI.RemoveOperand(Op); } -int Thumb1RegisterInfo:: -rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int Offset, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const -{ - // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo - // version then can pull out Thumb1 specific parts here - return 0; -} - -/// saveScavengerRegister - Spill the register so it can be used by the -/// register scavenger. Return true. -bool -Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator &UseMI, - const TargetRegisterClass *RC, - unsigned Reg) const { - // Thumb1 can't use the emergency spill slot on the stack because - // ldr/str immediate offsets must be positive, and if we're referencing - // off the frame pointer (if, for example, there are alloca() calls in - // the function, the offset will be negative. Use R12 instead since that's - // a call clobbered register that we know won't be used in Thumb1 mode. - DebugLoc DL; - BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)). - addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill); - - // The UseMI is where we would like to restore the register. If there's - // interference with R12 before then, however, we'll need to restore it - // before that instead and adjust the UseMI. - bool done = false; - for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { - if (II->isDebugValue()) - continue; - // If this instruction affects R12, adjust our restore point. - for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = II->getOperand(i); - if (!MO.isReg() || MO.isUndef() || !MO.getReg() || - TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - if (MO.getReg() == ARM::R12) { - UseMI = II; - done = true; - break; - } - } - } - // Restore the register from R12 - BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)). - addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill); - - return true; -} - -unsigned -Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const{ - unsigned VReg = 0; - unsigned i = 0; +bool Thumb1RegisterInfo:: +rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - unsigned FrameReg = ARM::SP; - int FrameIndex = MI.getOperand(i).getIndex(); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MF.getFrameInfo()->getStackSize() + SPAdj; - - if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea1Offset(); - else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea2Offset(); - else if (MF.getFrameInfo()->hasVarSizedObjects()) { - assert(SPAdj == 0 && hasFP(MF) && "Unexpected"); - // There are alloca()'s in this function, must reference off the frame - // pointer instead. - FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); - } - - // Special handling of dbg_value instructions. - if (MI.isDebugValue()) { - MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); - return 0; - } - unsigned Opcode = MI.getOpcode(); const TargetInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); if (Opcode == ARM::tADDrSPi) { - Offset += MI.getOperand(i+1).getImm(); + Offset += MI.getOperand(FrameRegIdx+1).getImm(); // Can't use tADDrSPi if it's based off the frame pointer. unsigned NumBits = 0; @@ -483,12 +395,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) { // Turn it into a move. MI.setDesc(TII.get(ARM::tMOVgpr2tgpr)); - MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); // Remove offset and remaining explicit predicate operands. - do MI.RemoveOperand(i+1); - while (MI.getNumOperands() > i+1 && - (!MI.getOperand(i+1).isReg() || !MI.getOperand(i+1).isImm())); - return 0; + do MI.RemoveOperand(FrameRegIdx+1); + while (MI.getNumOperands() > FrameRegIdx+1 && + (!MI.getOperand(FrameRegIdx+1).isReg() || + !MI.getOperand(FrameRegIdx+1).isImm())); + return true; } // Common case: small offset, fits into instruction. @@ -496,15 +409,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (((Offset / Scale) & ~Mask) == 0) { // Replace the FrameIndex with sp / fp if (Opcode == ARM::tADDi3) { - removeOperands(MI, i); + removeOperands(MI, FrameRegIdx); MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) .addImm(Offset / Scale)); } else { - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale); } - return 0; + return true; } unsigned DestReg = MI.getOperand(0).getReg(); @@ -516,7 +429,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, *this, dl); MBB.erase(II); - return 0; + return true; } if (Offset > 0) { @@ -524,12 +437,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // r0 = add sp, 255*4 // r0 = add r0, (imm - 255*4) if (Opcode == ARM::tADDi3) { - removeOperands(MI, i); + removeOperands(MI, FrameRegIdx); MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); } else { - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Mask); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask); } Offset = (Offset - Mask * Scale); MachineBasicBlock::iterator NII = llvm::next(II); @@ -542,14 +455,14 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); MI.setDesc(TII.get(ARM::tADDhirr)); - MI.getOperand(i).ChangeToRegister(DestReg, false, false, true); - MI.getOperand(i+1).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true); + MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false); if (Opcode == ARM::tADDi3) { MachineInstrBuilder MIB(&MI); AddDefaultPred(MIB); } } - return 0; + return true; } else { unsigned ImmIdx = 0; int InstrOffs = 0; @@ -557,7 +470,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Scale = 1; switch (AddrMode) { case ARMII::AddrModeT1_s: { - ImmIdx = i+1; + ImmIdx = FrameRegIdx+1; InstrOffs = MI.getOperand(ImmIdx).getImm(); NumBits = (FrameReg == ARM::SP) ? 8 : 5; Scale = 4; @@ -577,9 +490,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Mask = (1 << NumBits) - 1; if ((unsigned)Offset <= Mask * Scale) { // Replace the FrameIndex with sp - MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); ImmOp.ChangeToImmediate(ImmedOffset); - return 0; + return true; } bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; @@ -600,12 +513,126 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset &= ~(Mask*Scale); } } + return Offset == 0; +} + +void +Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const { + MachineInstr &MI = *I; + int Off = Offset; // ARM doesn't need the general 64-bit offsets + unsigned i = 0; + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + bool Done = false; + Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII); + assert (Done && "Unable to resolve frame index!"); +} + +/// saveScavengerRegister - Spill the register so it can be used by the +/// register scavenger. Return true. +bool +Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const { + // Thumb1 can't use the emergency spill slot on the stack because + // ldr/str immediate offsets must be positive, and if we're referencing + // off the frame pointer (if, for example, there are alloca() calls in + // the function, the offset will be negative. Use R12 instead since that's + // a call clobbered register that we know won't be used in Thumb1 mode. + DebugLoc DL; + BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)). + addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill); + + // The UseMI is where we would like to restore the register. If there's + // interference with R12 before then, however, we'll need to restore it + // before that instead and adjust the UseMI. + bool done = false; + for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { + if (II->isDebugValue()) + continue; + // If this instruction affects R12, adjust our restore point. + for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = II->getOperand(i); + if (!MO.isReg() || MO.isUndef() || !MO.getReg() || + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + if (MO.getReg() == ARM::R12) { + UseMI = II; + done = true; + break; + } + } + } + // Restore the register from R12 + BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)). + addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill); + + return true; +} + +void +Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + unsigned VReg = 0; + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc dl = MI.getDebugLoc(); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + unsigned FrameReg = ARM::SP; + int FrameIndex = MI.getOperand(i).getIndex(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MF.getFrameInfo()->getStackSize() + SPAdj; + + if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea2Offset(); + else if (MF.getFrameInfo()->hasVarSizedObjects()) { + assert(SPAdj == 0 && hasFP(MF) && "Unexpected"); + // There are alloca()'s in this function, must reference off the frame + // pointer or base pointer instead. + if (!hasBasePointer(MF)) { + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } else + FrameReg = BasePtr; + } + + // Special handling of dbg_value instructions. + if (MI.isDebugValue()) { + MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return; + } + + // Modify MI as necessary to handle as much of 'Offset' as possible + assert(AFI->isThumbFunction() && + "This eliminateFrameIndex only supports Thumb1!"); + if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII)) + return; // If we get here, the immediate doesn't fit into the instruction. We folded // as much as possible above, handle the rest, providing a register that is // SP+LargeImm. assert(Offset && "This code isn't needed if offset already handled!"); + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = MI.getDesc(); + // Remove predicate first. int PIdx = MI.findFirstPredOperandIdx(); if (PIdx != -1) @@ -637,11 +664,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.addOperand(MachineOperand::CreateReg(0, false)); } else if (Desc.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); - assert (Value && "Frame index virtual allocated, but Value arg is NULL!"); bool UseRR = false; - bool TrackVReg = true; - Value->first = FrameReg; // use the frame register as a kind indicator - Value->second = Offset; if (Opcode == ARM::tSpill) { if (FrameReg == ARM::SP) @@ -650,7 +673,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, else { emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); UseRR = true; - TrackVReg = false; } } else emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII, @@ -661,8 +683,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); else // tSTR has an extra register operand. MI.addOperand(MachineOperand::CreateReg(0, false)); - if (!ReuseFrameIndexVals || !TrackVReg) - VReg = 0; } else assert(false && "Unexpected opcode!"); @@ -671,7 +691,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstrBuilder MIB(&MI); AddDefaultPred(MIB); } - return VReg; } void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { @@ -742,11 +761,11 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { dl = MBBI->getDebugLoc(); } - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - if (STI.isTargetDarwin() || hasFP(MF)) { + // Adjust FP so it point to the stack slot that contains the previous FP. + if (hasFP(MF)) { BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0); + AFI->setShouldRestoreSPFromFP(true); } // Determine starting offsets of spill areas. @@ -764,14 +783,20 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); } - if (STI.isTargetELF() && hasFP(MF)) { + if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); - } AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + + // If we need a base pointer, set it up here. It's whatever the value + // of the stack pointer is at this point. Any variable size objects + // will be allocated after this, so we can still use the base pointer + // to reference locals. + if (hasBasePointer(MF)) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP); } static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { @@ -828,7 +853,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize()); - if (hasFP(MF)) { + if (AFI->shouldRestoreSPFromFP()) { NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot or target is ELF and the function has FP. diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 9a0308afa20c2..c578054a5d71f 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -38,27 +38,27 @@ public: unsigned PredReg = 0) const; /// Code Generation virtual methods... - bool hasReservedCallFrame(MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - // rewrite MI to access 'Offset' bytes from the FP. Return the offset that - // could not be handled directly in MI. - int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int Offset, - unsigned MOVOpc, unsigned ADDriOpc, - unsigned SUBriOpc) const; - + // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be + // however much remains to be handled. Return 'true' if no further + // work is required. + bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII) const; + void resolveFrameIndex(MachineBasicBlock::iterator I, + unsigned BaseReg, int64_t Offset) const; bool saveScavengerRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC, unsigned Reg) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index cd15bbed9f23e..45e693744b806 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -27,7 +27,7 @@ namespace { public: static char ID; - Thumb2ITBlockPass() : MachineFunctionPass(&ID) {} + Thumb2ITBlockPass() : MachineFunctionPass(ID) {} const Thumb2InstrInfo *TII; const TargetRegisterInfo *TRI; @@ -91,35 +91,53 @@ static void TrackDefUses(MachineInstr *MI, } } +static bool isCopy(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: + return false; + case ARM::MOVr: + case ARM::MOVr_TC: + case ARM::tMOVr: + case ARM::tMOVgpr2tgpr: + case ARM::tMOVtgpr2gpr: + case ARM::tMOVgpr2gpr: + case ARM::t2MOVr: + return true; + } +} + bool Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, ARMCC::CondCodes CC, ARMCC::CondCodes OCC, SmallSet<unsigned, 4> &Defs, SmallSet<unsigned, 4> &Uses) { - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - assert(SrcSubIdx == 0 && DstSubIdx == 0 && - "Sub-register indices still around?"); - // llvm models select's as two-address instructions. That means a copy - // is inserted before a t2MOVccr, etc. If the copy is scheduled in - // between selects we would end up creating multiple IT blocks. - - // First check if it's safe to move it. - if (Uses.count(DstReg) || Defs.count(SrcReg)) - return false; - - // Then peek at the next instruction to see if it's predicated on CC or OCC. - // If not, then there is nothing to be gained by moving the copy. - MachineBasicBlock::iterator I = MI; ++I; - MachineBasicBlock::iterator E = MI->getParent()->end(); - while (I != E && I->isDebugValue()) - ++I; - if (I != E) { - unsigned NPredReg = 0; - ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); - if (NCC == CC || NCC == OCC) - return true; - } + if (!isCopy(MI)) + return false; + // llvm models select's as two-address instructions. That means a copy + // is inserted before a t2MOVccr, etc. If the copy is scheduled in + // between selects we would end up creating multiple IT blocks. + assert(MI->getOperand(0).getSubReg() == 0 && + MI->getOperand(1).getSubReg() == 0 && + "Sub-register indices still around?"); + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + // First check if it's safe to move it. + if (Uses.count(DstReg) || Defs.count(SrcReg)) + return false; + + // Then peek at the next instruction to see if it's predicated on CC or OCC. + // If not, then there is nothing to be gained by moving the copy. + MachineBasicBlock::iterator I = MI; ++I; + MachineBasicBlock::iterator E = MI->getParent()->end(); + while (I != E && I->isDebugValue()) + ++I; + if (I != E) { + unsigned NPredReg = 0; + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); + if (NCC == CC || NCC == OCC) + return true; } return false; } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index ee517279c9d79..442f41da8a2d9 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -147,8 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -173,8 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index ba392f36d9464..0c3962dd123d8 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -173,7 +173,7 @@ namespace { char Thumb2SizeReduce::ID = 0; } -Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) { +Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { unsigned FromOpc = ReduceTable[i].WideOpc; if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) @@ -315,6 +315,18 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) return false; + // For the non-writeback version (this one), the base register must be + // one of the registers being loaded. + bool isOK = false; + for (unsigned i = 4; i < MI->getNumOperands(); ++i) { + if (MI->getOperand(i).getReg() == BaseReg) { + isOK = true; + break; + } + } + if (!isOK) + return false; + OpNum = 0; isLdStMul = true; break; diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp index 001656e0121a7..3768117095369 100644 --- a/lib/Target/Alpha/AlphaBranchSelector.cpp +++ b/lib/Target/Alpha/AlphaBranchSelector.cpp @@ -22,7 +22,7 @@ using namespace llvm; namespace { struct AlphaBSel : public MachineFunctionPass { static char ID; - AlphaBSel() : MachineFunctionPass(&ID) {} + AlphaBSel() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &Fn); diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp index a6c6f52704f6b..3aec07035d74c 100644 --- a/lib/Target/Alpha/AlphaCodeEmitter.cpp +++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp @@ -34,7 +34,7 @@ namespace { public: static char ID; - AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(&ID), + AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(ID), MCE(mce) {} /// getBinaryCodeForInstr - This function, generated by the diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index d526dc0827b26..d197bd15ef9c5 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -113,8 +113,8 @@ namespace { static uint64_t getNearPower2(uint64_t x) { if (!x) return 0; unsigned at = CountLeadingZeros_64(x); - uint64_t complow = 1 << (63 - at); - uint64_t comphigh = 1 << (64 - at); + uint64_t complow = 1ULL << (63 - at); + uint64_t comphigh = 1ULL << (64 - at); //cerr << x << ":" << complow << ":" << comphigh << "\n"; if (abs64(complow - x) <= abs64(comphigh - x)) return complow; diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index ad625a2694172..5a2f5610fdb49 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -27,32 +27,6 @@ AlphaInstrInfo::AlphaInstrInfo() RI(*this) { } -bool AlphaInstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned& sourceReg, unsigned& destReg, - unsigned& SrcSR, unsigned& DstSR) const { - unsigned oc = MI.getOpcode(); - if (oc == Alpha::BISr || - oc == Alpha::CPYSS || - oc == Alpha::CPYST || - oc == Alpha::CPYSSt || - oc == Alpha::CPYSTs) { - // or r1, r2, r2 - // cpys(s|t) r1 r2 r2 - assert(MI.getNumOperands() >= 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isReg() && - "invalid Alpha BIS instruction!"); - if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - SrcSR = DstSR = 0; - return true; - } - } - return false; -} - unsigned AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index e20e8323b64e6..ee6077a4a01a1 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -30,12 +30,6 @@ public: /// virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; virtual unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp index 34be470f03e30..85fbfd1affe21 100644 --- a/lib/Target/Alpha/AlphaLLRP.cpp +++ b/lib/Target/Alpha/AlphaLLRP.cpp @@ -39,7 +39,7 @@ namespace { static char ID; AlphaLLRPPass(AlphaTargetMachine &tm) - : MachineFunctionPass(&ID), TM(tm) { } + : MachineFunctionPass(ID), TM(tm) { } virtual const char *getPassName() const { return "Alpha NOP inserter"; diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index dc9d935ec047b..327ddb4d9a720 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -137,10 +137,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, //variable locals //<- SP -unsigned +void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -185,7 +184,6 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else { MI.getOperand(i).ChangeToImmediate(Offset); } - return 0; } diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index f9fd87a637374..b164979a63119 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -38,9 +38,8 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp index 9f4aff6fd5f53..5428cb96173b2 100644 --- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp +++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp @@ -53,8 +53,6 @@ namespace { void printOp(const MachineOperand &MO, raw_ostream &O); void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printBaseOffsetPair(const MachineInstr *MI, int i, raw_ostream &O, - bool brackets=true); virtual void EmitFunctionBodyStart(); virtual void EmitFunctionBodyEnd(); void EmitStartOfAsmFile(Module &M); diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp index a74d42d595496..e50d57a31b6ef 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -28,34 +28,6 @@ BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST) RI(ST, *this), Subtarget(ST) {} -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -bool BlackfinInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, - unsigned &DstReg, - unsigned &SrcSR, - unsigned &DstSR) const { - SrcSR = DstSR = 0; // No sub-registers. - switch (MI.getOpcode()) { - case BF::MOVE: - case BF::MOVE_ncccc: - case BF::MOVE_ccncc: - case BF::MOVECC_zext: - case BF::MOVECC_nz: - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - case BF::SLL16i: - if (MI.getOperand(2).getImm()!=0) - return false; - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - default: - return false; - } -} - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h index 6c3591707269f..fdc1029da5883 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.h +++ b/lib/Target/Blackfin/BlackfinInstrInfo.h @@ -30,10 +30,6 @@ namespace llvm { /// always be able to get register info as well (through this method). virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; } - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 06e95de1587f0..a51831263e909 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -190,10 +190,9 @@ static unsigned findScratchRegister(MachineBasicBlock::iterator II, return Reg; } -unsigned +void BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -230,20 +229,20 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.setDesc(TII.get(isStore ? BF::STORE32p_uimm6m4 : BF::LOAD32p_uimm6m4)); - return 0; + return; } if (BaseReg == BF::FP && isUInt<7>(-Offset)) { MI.setDesc(TII.get(isStore ? BF::STORE32fp_nimm7m4 : BF::LOAD32fp_nimm7m4)); MI.getOperand(FIPos+1).setImm(-Offset); - return 0; + return; } if (isInt<18>(Offset)) { MI.setDesc(TII.get(isStore ? BF::STORE32p_imm18m4 : BF::LOAD32p_imm18m4)); - return 0; + return; } // Use RegScavenger to calculate proper offset... MI.dump(); @@ -328,7 +327,6 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, llvm_unreachable("Cannot eliminate frame index"); break; } - return 0; } void BlackfinRegisterInfo:: @@ -344,10 +342,6 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } } -void BlackfinRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const { -} - // Emit a prologue that sets up a stack frame. // On function entry, R0-R2 and P0 may hold arguments. // R3, P1, and P2 may be used as scratch registers diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index ead0b4a73c832..bb83c34f80032 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -51,15 +51,12 @@ namespace llvm { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index e8d8474b5be86..270fff6064adc 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -73,7 +73,7 @@ namespace { public: static char ID; CBackendNameAllUsedStructsAndMergeFunctions() - : ModulePass(&ID) {} + : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<FindUsedTypes>(); } @@ -110,7 +110,7 @@ namespace { public: static char ID; explicit CWriter(formatted_raw_ostream &o) - : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), + : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0), TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0), NextAnonValueNumber(0) { FPCounter = 0; @@ -199,7 +199,6 @@ namespace { void lowerIntrinsics(Function &F); - void printModule(Module *M); void printModuleTypes(const TypeSymbolTable &ST); void printContainedStructs(const Type *Ty, std::set<const Type *> &); void printFloatingPointConstants(Function &F); @@ -1300,6 +1299,13 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { } std::string CWriter::GetValueName(const Value *Operand) { + + // Resolve potential alias. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) { + if (const Value *V = GA->resolveAliasedGlobal(false)) + Operand = V; + } + // Mangle globals with the standard mangler interface for LLC compatibility. if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) { SmallString<128> Str; diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td index ec2f663908f68..04fa2ae866d69 100644 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -1,4 +1,4 @@ -//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===// +//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -19,16 +19,17 @@ class CCIfSubtarget<string F, CCAction A> // Return Value Calling Convention //===----------------------------------------------------------------------===// -// Return-value convention for Cell SPU: Everything can be passed back via $3: +// Return-value convention for Cell SPU: return value to be passed in reg 3-74 def RetCC_SPU : CallingConv<[ - CCIfType<[i8], CCAssignToReg<[R3]>>, - CCIfType<[i16], CCAssignToReg<[R3]>>, - CCIfType<[i32], CCAssignToReg<[R3]>>, - CCIfType<[i64], CCAssignToReg<[R3]>>, - CCIfType<[i128], CCAssignToReg<[R3]>>, - CCIfType<[f32, f64], CCAssignToReg<[R3]>>, - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>, - CCIfType<[v2i32], CCAssignToReg<[R3]>> + CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64], + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74]>> ]>; @@ -45,8 +46,7 @@ def CCC_SPU : CallingConv<[ R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, + R66, R67, R68, R69, R70, R71, R72, R73, R74]>>, // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 9b8c2ddd06359..2f1598441f5ae 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -41,13 +41,6 @@ using namespace llvm; namespace { //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates bool - isI64IntS10Immediate(ConstantSDNode *CN) - { - return isInt<10>(CN->getSExtValue()); - } - - //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates - bool isI32IntS10Immediate(ConstantSDNode *CN) { return isInt<10>(CN->getSExtValue()); @@ -67,14 +60,6 @@ namespace { return isInt<10>(CN->getSExtValue()); } - //! SDNode predicate for i16 sign-extended, 10-bit immediate values - bool - isI16IntS10Immediate(SDNode *N) - { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); - return (CN != 0 && isI16IntS10Immediate(CN)); - } - //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values bool isI16IntU10Immediate(ConstantSDNode *CN) @@ -82,14 +67,6 @@ namespace { return isUInt<10>((short) CN->getZExtValue()); } - //! SDNode predicate for i16 sign-extended, 10-bit immediate values - bool - isI16IntU10Immediate(SDNode *N) - { - return (N->getOpcode() == ISD::Constant - && isI16IntU10Immediate(cast<ConstantSDNode>(N))); - } - //! ConstantSDNode predicate for signed 16-bit values /*! \arg CN The constant SelectionDAG node holding the value @@ -119,14 +96,6 @@ namespace { return false; } - //! SDNode predicate for signed 16-bit values. - bool - isIntS16Immediate(SDNode *N, short &Imm) - { - return (N->getOpcode() == ISD::Constant - && isIntS16Immediate(cast<ConstantSDNode>(N), Imm)); - } - //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext. static bool isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm) @@ -142,16 +111,6 @@ namespace { return false; } - bool - isHighLow(const SDValue &Op) - { - return (Op.getOpcode() == SPUISD::IndirectAddr - && ((Op.getOperand(0).getOpcode() == SPUISD::Hi - && Op.getOperand(1).getOpcode() == SPUISD::Lo) - || (Op.getOperand(0).getOpcode() == SPUISD::Lo - && Op.getOperand(1).getOpcode() == SPUISD::Hi))); - } - //===------------------------------------------------------------------===// //! EVT to "useful stuff" mapping structure: @@ -607,7 +566,8 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, return true; } else if (Opc == ISD::Register ||Opc == ISD::CopyFromReg - ||Opc == ISD::UNDEF) { + ||Opc == ISD::UNDEF + ||Opc == ISD::Constant) { unsigned OpOpc = Op->getOpcode(); if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index ece19b9b89f6d..46f31899be0c4 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -426,9 +426,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); - // "Odd size" vector classes that we're willing to support: - addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); - for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)i; @@ -751,7 +748,6 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (alignment == 16) { ConstantSDNode *CN; - // Special cases for a known aligned load to simplify the base pointer // and insertion byte: if (basePtr.getOpcode() == ISD::ADD @@ -775,6 +771,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); } } else { // Unaligned load: must be more pessimistic about addressing modes: @@ -811,8 +810,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { DAG.getConstant(0, PtrVT)); } - // Re-emit as a v16i8 vector load - alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, + // Load the memory to which to store. + alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr, SN->getSrcValue(), SN->getSrcValueOffset(), SN->isVolatile(), SN->isNonTemporal(), 16); @@ -843,10 +842,10 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { } #endif - SDValue insertEltOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); - SDValue vectorizeOp = - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); + SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, + insertEltOffs); + SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, + theValue); result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, vectorizeOp, alignLoadVec, @@ -1325,41 +1324,23 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (Ins.empty()) return Chain; + // Now handle the return value(s) + SmallVector<CCValAssign, 16> RVLocs; + CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU); + + // If the call has results, copy the values out of the ret val registers. - switch (Ins[0].VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unexpected ret value!"); - case MVT::Other: break; - case MVT::i32: - if (Ins.size() > 1 && Ins[1].VT == MVT::i32) { - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, - MVT::i32, InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, - Chain.getValue(2)).getValue(1); - InVals.push_back(Chain.getValue(0)); - } else { - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - } - break; - case MVT::i8: - case MVT::i16: - case MVT::i64: - case MVT::i128: - case MVT::f32: - case MVT::f64: - case MVT::v2f64: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - break; - } + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + InVals.push_back(Val); + } return Chain; } @@ -1621,10 +1602,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); } - case MVT::v2i32: { - SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); - } case MVT::v2i64: { return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); } @@ -1748,11 +1725,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If we have a single element being moved from V1 to V2, this can be handled // using the C*[DX] compute mask instructions, but the vector elements have - // to be monotonically increasing with one exception element. + // to be monotonically increasing with one exception element, and the source + // slot of the element to move must be the same as the destination. EVT VecVT = V1.getValueType(); EVT EltVT = VecVT.getVectorElementType(); unsigned EltsFromV2 = 0; - unsigned V2Elt = 0; + unsigned V2EltOffset = 0; unsigned V2EltIdx0 = 0; unsigned CurrElt = 0; unsigned MaxElts = VecVT.getVectorNumElements(); @@ -1785,9 +1763,13 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (monotonic) { if (SrcElt >= V2EltIdx0) { - if (1 >= (++EltsFromV2)) { - V2Elt = (V2EltIdx0 - SrcElt) << 2; - } + // TODO: optimize for the monotonic case when several consecutive + // elements are taken form V2. Do we ever get such a case? + if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0)) + V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8); + else + monotonic = false; + ++EltsFromV2; } else if (CurrElt != SrcElt) { monotonic = false; } @@ -1823,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // R1 ($sp) is used here only as it is guaranteed to have last bits zero SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(V2Elt, MVT::i32)); + DAG.getConstant(V2EltOffset, MVT::i32)); SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); @@ -1847,7 +1829,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { for (unsigned j = 0; j < BytesPerElement; ++j) ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); } - SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &ResultMask[0], ResultMask.size()); return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); @@ -1997,7 +1978,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // Variable index: Rotate the requested element into slot 0, then replicate // slot 0 across the vector EVT VecVT = N.getValueType(); - if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { + if (!VecVT.isSimple() || !VecVT.isVector()) { report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" "vector type!"); } @@ -2072,21 +2053,25 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue IdxOp = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); + EVT eltVT = ValOp.getValueType(); // use 0 when the lane to insert to is 'undef' - int64_t Idx=0; + int64_t Offset=0; if (IdxOp.getOpcode() != ISD::UNDEF) { ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); - Idx = (CN->getSExtValue()); + Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8; } EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Use $sp ($1) because it's always 16-byte aligned and it's available: SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(Idx, PtrVT)); - SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); + DAG.getConstant(Offset, PtrVT)); + // widen the mask when dealing with half vectors + EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), + 128/ VT.getVectorElementType().getSizeInBits()); + SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); SDValue result = DAG.getNode(SPUISD::SHUFB, dl, VT, diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 69aa0887bd77f..26d6b4f25ef12 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -54,148 +54,6 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) RI(*TM.getSubtargetImpl(), *this) { /* NOP */ } -bool -SPUInstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned& sourceReg, - unsigned& destReg, - unsigned& SrcSR, unsigned& DstSR) const { - SrcSR = DstSR = 0; // No sub-registers. - - switch (MI.getOpcode()) { - default: - break; - case SPU::ORIv4i32: - case SPU::ORIr32: - case SPU::ORHIv8i16: - case SPU::ORHIr16: - case SPU::ORHIi8i16: - case SPU::ORBIv16i8: - case SPU::ORBIr8: - case SPU::ORIi16i32: - case SPU::ORIi8i32: - case SPU::AHIvec: - case SPU::AHIr16: - case SPU::AIv4i32: - assert(MI.getNumOperands() == 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isImm() && - "invalid SPU ORI/ORHI/ORBI/AHI/AI/SFI/SFHI instruction!"); - if (MI.getOperand(2).getImm() == 0) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - break; - case SPU::AIr32: - assert(MI.getNumOperands() == 3 && - "wrong number of operands to AIr32"); - if (MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - (MI.getOperand(2).isImm() && - MI.getOperand(2).getImm() == 0)) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - break; - case SPU::LRr8: - case SPU::LRr16: - case SPU::LRr32: - case SPU::LRf32: - case SPU::LRr64: - case SPU::LRf64: - case SPU::LRr128: - case SPU::LRv16i8: - case SPU::LRv8i16: - case SPU::LRv4i32: - case SPU::LRv4f32: - case SPU::LRv2i64: - case SPU::LRv2f64: - case SPU::ORv16i8_i8: - case SPU::ORv8i16_i16: - case SPU::ORv4i32_i32: - case SPU::ORv2i64_i64: - case SPU::ORv4f32_f32: - case SPU::ORv2f64_f64: - case SPU::ORi8_v16i8: - case SPU::ORi16_v8i16: - case SPU::ORi32_v4i32: - case SPU::ORi64_v2i64: - case SPU::ORf32_v4f32: - case SPU::ORf64_v2f64: -/* - case SPU::ORi128_r64: - case SPU::ORi128_f64: - case SPU::ORi128_r32: - case SPU::ORi128_f32: - case SPU::ORi128_r16: - case SPU::ORi128_r8: -*/ - case SPU::ORi128_vec: -/* - case SPU::ORr64_i128: - case SPU::ORf64_i128: - case SPU::ORr32_i128: - case SPU::ORf32_i128: - case SPU::ORr16_i128: - case SPU::ORr8_i128: -*/ - case SPU::ORvec_i128: -/* - case SPU::ORr16_r32: - case SPU::ORr8_r32: - case SPU::ORf32_r32: - case SPU::ORr32_f32: - case SPU::ORr32_r16: - case SPU::ORr32_r8: - case SPU::ORr16_r64: - case SPU::ORr8_r64: - case SPU::ORr64_r16: - case SPU::ORr64_r8: -*/ - case SPU::ORr64_r32: - case SPU::ORr32_r64: - case SPU::ORf32_r32: - case SPU::ORr32_f32: - case SPU::ORf64_r64: - case SPU::ORr64_f64: { - assert(MI.getNumOperands() == 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid SPU OR<type>_<vec> or LR instruction!"); - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - break; - } - case SPU::ORv16i8: - case SPU::ORv8i16: - case SPU::ORv4i32: - case SPU::ORv2i64: - case SPU::ORr8: - case SPU::ORr16: - case SPU::ORr32: - case SPU::ORr64: - case SPU::ORr128: - case SPU::ORf32: - case SPU::ORf64: - assert(MI.getNumOperands() == 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isReg() && - "invalid SPU OR(vec|r32|r64|gprc) instruction!"); - if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - break; - } - - return false; -} - unsigned SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index fbb1733181486..191e55d0ca612 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -32,12 +32,6 @@ namespace llvm { /// virtual const SPURegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index a7fb14c26a76c..ca0fe00e37f89 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -62,8 +62,6 @@ let canFoldAsLoad = 1 in { def v4f32: LoadDFormVec<v4f32>; def v2f64: LoadDFormVec<v2f64>; - def v2i32: LoadDFormVec<v2i32>; - def r128: LoadDForm<GPRC>; def r64: LoadDForm<R64C>; def r32: LoadDForm<R32C>; @@ -96,8 +94,6 @@ let canFoldAsLoad = 1 in { def v4f32: LoadAFormVec<v4f32>; def v2f64: LoadAFormVec<v2f64>; - def v2i32: LoadAFormVec<v2i32>; - def r128: LoadAForm<GPRC>; def r64: LoadAForm<R64C>; def r32: LoadAForm<R32C>; @@ -130,8 +126,6 @@ let canFoldAsLoad = 1 in { def v4f32: LoadXFormVec<v4f32>; def v2f64: LoadXFormVec<v2f64>; - def v2i32: LoadXFormVec<v2i32>; - def r128: LoadXForm<GPRC>; def r64: LoadXForm<R64C>; def r32: LoadXForm<R32C>; @@ -180,8 +174,6 @@ multiclass StoreDForms def v4f32: StoreDFormVec<v4f32>; def v2f64: StoreDFormVec<v2f64>; - def v2i32: StoreDFormVec<v2i32>; - def r128: StoreDForm<GPRC>; def r64: StoreDForm<R64C>; def r32: StoreDForm<R32C>; @@ -212,8 +204,6 @@ multiclass StoreAForms def v4f32: StoreAFormVec<v4f32>; def v2f64: StoreAFormVec<v2f64>; - def v2i32: StoreAFormVec<v2i32>; - def r128: StoreAForm<GPRC>; def r64: StoreAForm<R64C>; def r32: StoreAForm<R32C>; @@ -246,8 +236,6 @@ multiclass StoreXForms def v4f32: StoreXFormVec<v4f32>; def v2f64: StoreXFormVec<v2f64>; - def v2i32: StoreXFormVec<v2i32>; - def r128: StoreXForm<GPRC>; def r64: StoreXForm<R64C>; def r32: StoreXForm<R32C>; @@ -607,7 +595,6 @@ class ARegInst<RegisterClass rclass>: multiclass AddInstruction { def v4i32: AVecInst<v4i32>; def v16i8: AVecInst<v16i8>; - def r32: ARegInst<R32C>; } @@ -672,6 +659,7 @@ def SFvec : RRForm<0b00000010000, (outs VECREG:$rT), "sf\t$rT, $rA, $rB", IntegerOp, [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>; + def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), "sf\t$rT, $rA, $rB", IntegerOp, [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>; @@ -1448,6 +1436,9 @@ class ORCvtGPRCVec: class ORCvtVecGPRC: ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; +class ORCvtVecVec: + ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>; + multiclass BitwiseOr { def v16i8: ORVecInst<v16i8>; @@ -3894,6 +3885,79 @@ multiclass SFPSub defm FS : SFPSub; +class FMInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b01100011010, OOL, IOL, + "fm\t$rT, $rA, $rB", SPrecFP, + pattern>; + +class FMVecInst<ValueType type>: + FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (type VECREG:$rT), + (fmul (type VECREG:$rA), (type VECREG:$rB)))]>; + +multiclass SFPMul +{ + def v4f32: FMVecInst<v4f32>; + def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; +} + +defm FM : SFPMul; + +// Floating point multiply and add +// e.g. d = c + (a * b) +def FMAv4f32: + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fma\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fadd (v4f32 VECREG:$rC), + (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>; + +def FMAf32: + RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fma\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; + +// FP multiply and subtract +// Subtracts value in rC from product +// res = a * b - c +def FMSv4f32 : + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), + (v4f32 VECREG:$rC)))]>; + +def FMSf32 : + RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, + (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>; + +// Floating Negative Mulitply and Subtract +// Subtracts product from value in rC +// res = fneg(fms a b c) +// = - (a * b - c) +// = c - a * b +// NOTE: subtraction order +// fsub a b = a - b +// fs a b = b - a? +def FNMSf32 : + RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fnms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; + +def FNMSv4f32 : + RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fnms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fsub (v4f32 VECREG:$rC), + (fmul (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB))))]>; + + + + // Floating point reciprocal estimate class FRESTInst<dag OOL, dag IOL>: @@ -4019,72 +4083,6 @@ def FSCRRf32 : // status and control register read //-------------------------------------- -// Floating point multiply instructions -//-------------------------------------- - -def FMv4f32: - RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), - "fm\t$rT, $rA, $rB", SPrecFP, - [(set (v4f32 VECREG:$rT), (fmul (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB)))]>; - -def FMf32 : - RRForm<0b01100011010, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), - "fm\t$rT, $rA, $rB", SPrecFP, - [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; - -// Floating point multiply and add -// e.g. d = c + (a * b) -def FMAv4f32: - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fma\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fadd (v4f32 VECREG:$rC), - (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>; - -def FMAf32: - RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fma\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; - -// FP multiply and subtract -// Subtracts value in rC from product -// res = a * b - c -def FMSv4f32 : - RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), - (v4f32 VECREG:$rC)))]>; - -def FMSf32 : - RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, - (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>; - -// Floating Negative Mulitply and Subtract -// Subtracts product from value in rC -// res = fneg(fms a b c) -// = - (a * b - c) -// = c - a * b -// NOTE: subtraction order -// fsub a b = a - b -// fs a b = b - a? -def FNMSf32 : - RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), - "fnms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; - -def FNMSv4f32 : - RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), - "fnms\t$rT, $rA, $rB, $rC", SPrecFP, - [(set (v4f32 VECREG:$rT), - (fsub (v4f32 VECREG:$rC), - (fmul (v4f32 VECREG:$rA), - (v4f32 VECREG:$rB))))]>; - -//-------------------------------------- // Floating Point Conversions // Signed conversions: def CSiFv4f32: diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td index 6216651e48a40..e1a0358abc46f 100644 --- a/lib/Target/CellSPU/SPUOperands.td +++ b/lib/Target/CellSPU/SPUOperands.td @@ -98,12 +98,6 @@ def immU8 : PatLeaf<(imm), [{ return (N->getZExtValue() <= 0xff); }]>; -// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign -// extended field. Used by RI10Form instructions like 'ldq'. -def i64ImmSExt10 : PatLeaf<(imm), [{ - return isI64IntS10Immediate(N); -}]>; - // i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign // extended field. Used by RI10Form instructions like 'ldq'. def i32ImmSExt10 : PatLeaf<(imm), [{ diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index f7cfa42f2a95a..cf718917a5616 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -270,9 +270,8 @@ SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, MBB.erase(I); } -unsigned +void SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value, RegScavenger *RS) const { unsigned i = 0; @@ -328,7 +327,6 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } else { MO.ChangeToImmediate(Offset); } - return 0; } /// determineFrameLayout - Determine the size of the frame and maximum call @@ -417,7 +415,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const if (hasDebugInfo) { // Mark effective beginning of when frame pointer becomes valid. FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel); } // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) @@ -476,7 +474,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const // Mark effective beginning of when frame pointer is ready. MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(ReadyLabel); + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel); MachineLocation FPDst(SPU::R1); MachineLocation FPSrc(MachineLocation::VirtualFP); @@ -491,7 +489,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const dl = MBBI->getDebugLoc(); // Insert terminator label - BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)) + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)) .addSym(MMI.getContext().CreateTempSymbol()); } } @@ -587,6 +585,7 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const case SPU::LQDr32: return SPU::LQXr32; case SPU::LQDr128: return SPU::LQXr128; case SPU::LQDv16i8: return SPU::LQXv16i8; + case SPU::LQDv4i32: return SPU::LQXv4i32; case SPU::LQDv4f32: return SPU::LQXv4f32; case SPU::STQDr32: return SPU::STQXr32; case SPU::STQDr128: return SPU::STQXr128; diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 7a6ae6d43c7ed..aedb769cb4fc8 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -63,9 +63,8 @@ namespace llvm { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; //! Convert frame indicies into machine operands - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + RegScavenger *RS = NULL) const; //! Determine the frame's layour void determineFrameLayout(MachineFunction &MF) const; diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td index bb88f2bf9a298..3e8f0979256af 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.td +++ b/lib/Target/CellSPU/SPURegisterInfo.td @@ -394,7 +394,7 @@ def R8C : RegisterClass<"SPU", [i8], 128, // The SPU's registers as vector registers: def VECREG : RegisterClass<"SPU", - [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64], + [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128, [ /* volatile register */ diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 145568adcd4af..f08559f6e9f2e 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -104,7 +104,7 @@ namespace { public: static char ID; explicit CppWriter(formatted_raw_ostream &o) : - ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){} + ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){} virtual const char *getPassName() const { return "C++ backend"; } @@ -288,6 +288,8 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { Out << "GlobalValue::LinkerPrivateLinkage"; break; case GlobalValue::LinkerPrivateWeakLinkage: Out << "GlobalValue::LinkerPrivateWeakLinkage"; break; + case GlobalValue::LinkerPrivateWeakDefAutoLinkage: + Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break; case GlobalValue::AvailableExternallyLinkage: Out << "GlobalValue::AvailableExternallyLinkage "; break; case GlobalValue::LinkOnceAnyLinkage: @@ -471,14 +473,22 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, HANDLE_ATTR(Nest); HANDLE_ATTR(ReadNone); HANDLE_ATTR(ReadOnly); - HANDLE_ATTR(InlineHint); HANDLE_ATTR(NoInline); HANDLE_ATTR(AlwaysInline); HANDLE_ATTR(OptimizeForSize); HANDLE_ATTR(StackProtect); HANDLE_ATTR(StackProtectReq); HANDLE_ATTR(NoCapture); + HANDLE_ATTR(NoRedZone); + HANDLE_ATTR(NoImplicitFloat); + HANDLE_ATTR(Naked); + HANDLE_ATTR(InlineHint); #undef HANDLE_ATTR + if (attrs & Attribute::StackAlignment) + Out << " | Attribute::constructStackAlignmentFromInt(" + << Attribute::getStackAlignmentFromAttrs(attrs) + << ")"; + attrs &= ~Attribute::StackAlignment; assert(attrs == 0 && "Unhandled attribute!"); Out << ";"; nl(Out); @@ -1404,7 +1414,8 @@ void CppWriter::printInstruction(const Instruction *I, nl(Out); } Out << "CallInst* " << iName << " = CallInst::Create(" - << opNames[call->getNumArgOperands()] << ", " << iName << "_params.begin(), " + << opNames[call->getNumArgOperands()] << ", " + << iName << "_params.begin(), " << iName << "_params.end(), \""; } else if (call->getNumArgOperands() == 1) { Out << "CallInst* " << iName << " = CallInst::Create(" diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp index b6e4d654f4aa7..f4b30ad271f16 100644 --- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp @@ -65,11 +65,8 @@ namespace { void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = 0); - void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier = 0); void printSavedRegsBitmask(raw_ostream &OS); - const char *emitCurrentABIString(); void emitFrameDirective(); void printInstruction(const MachineInstr *MI, raw_ostream &O); @@ -292,13 +289,6 @@ printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, printOperand(MI, opNum, O); } -void MBlazeAsmPrinter:: -printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier) { - const MachineOperand& MO = MI->getOperand(opNum); - O << MBlaze::MBlazeFCCToString((MBlaze::CondCode)MO.getImm()); -} - // Force static initialization. extern "C" void LLVMInitializeMBlazeAsmPrinter() { RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget); diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td index 482ddd3963fba..3815b6d0a398d 100644 --- a/lib/Target/MBlaze/MBlaze.td +++ b/lib/Target/MBlaze/MBlaze.td @@ -1,4 +1,4 @@ -//===- MBlaze.td - Describe the MBlaze Target Machine -----------*- C++ -*-===// +//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td index ddd49980e0a29..8622e0d74bcd5 100644 --- a/lib/Target/MBlaze/MBlazeCallingConv.td +++ b/lib/Target/MBlaze/MBlazeCallingConv.td @@ -1,4 +1,4 @@ -//===- MBlazeCallingConv.td - Calling Conventions for MBlaze ----*- C++ -*-===// +//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp index 42fea25073327..b551b79b291ed 100644 --- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp +++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp @@ -32,7 +32,7 @@ namespace { static char ID; Filler(TargetMachine &tm) - : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { } + : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } virtual const char *getPassName() const { return "MBlaze Delay Slot Filler"; diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp index c7cd5f4e44a98..e64dd0e3e2c3b 100644 --- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp +++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp @@ -219,7 +219,7 @@ SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) { // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - if (Predicate_immSExt16(CN)) { + if (isUInt<16>(CN->getZExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td index a48a8c9723537..657b1d4940a70 100644 --- a/lib/Target/MBlaze/MBlazeInstrFPU.td +++ b/lib/Target/MBlaze/MBlazeInstrFPU.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----------*- C++ -*-===// +//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td index b59999e76ae58..51584111e6661 100644 --- a/lib/Target/MBlaze/MBlazeInstrFSL.td +++ b/lib/Target/MBlaze/MBlazeInstrFSL.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----------*- C++ -*-===// +//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td index 7d655433d4e4d..28e8e4402225b 100644 --- a/lib/Target/MBlaze/MBlazeInstrFormats.td +++ b/lib/Target/MBlaze/MBlazeInstrFormats.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrFormats.td - MB Instruction defs --------------*- C++ -*-===// +//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 6ff5825a26b6e..b590c090e095a 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -30,41 +30,6 @@ static bool isZeroImm(const MachineOperand &op) { return op.isImm() && op.getImm() == 0; } -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -bool MBlazeInstrInfo:: -isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - - // add $dst, $src, $zero || addu $dst, $zero, $src - // or $dst, $src, $zero || or $dst, $zero, $src - if ((MI.getOpcode() == MBlaze::ADD) || (MI.getOpcode() == MBlaze::OR)) { - if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == MBlaze::R0) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - return true; - } else if (MI.getOperand(2).isReg() && - MI.getOperand(2).getReg() == MBlaze::R0) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - } - - // addi $dst, $src, 0 - // ori $dst, $src, 0 - if ((MI.getOpcode() == MBlaze::ADDI) || (MI.getOpcode() == MBlaze::ORI)) { - if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - } - - return false; -} - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index f0743705f010e..b3dba0ec768c0 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -173,12 +173,6 @@ public: /// virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td index 3c406dda0591b..e5d153474a7e5 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrInfo.td - MBlaze Instruction defs -------------*- C++ -*-===// +//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td index 82552fa4b343a..a27cb5ba0dc4f 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsics.td +++ b/lib/Target/MBlaze/MBlazeIntrinsics.td @@ -17,17 +17,11 @@ // MBlaze intrinsic classes. let TargetPrefix = "mblaze", isTarget = 1 in { - class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty], - [IntrWriteMem]>; + class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; - class MBFSL_Put_Intrinsic : Intrinsic<[], - [llvm_i32_ty, llvm_i32_ty], - [IntrWriteMem]>; + class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; - class MBFSL_PutT_Intrinsic : Intrinsic<[], - [llvm_i32_ty], - [IntrWriteMem]>; + class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], []>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 8cafa8c519c62..22b6a30470d17 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -242,9 +242,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // FrameIndex represent objects inside a abstract stack. // We must replace FrameIndex with an stack/frame pointer // direct reference. -unsigned MBlazeRegisterInfo:: +void MBlazeRegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value, RegScavenger *RS) const { + RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); @@ -277,7 +277,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, MI.getOperand(oi).ChangeToImmediate(Offset); MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); - return 0; } void MBlazeRegisterInfo:: diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index af97b0e2d79ec..1e1fde14ab7b8 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -63,9 +63,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { MachineBasicBlock::iterator I) const; /// Stack Frame Processing Methods - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td index d0a1e7556c439..5e935103389e6 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.td +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td @@ -1,4 +1,4 @@ -//===- MBlazeRegisterInfo.td - MBlaze Register defs -------------*- C++ -*-===// +//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td index 1fec9e694005c..4a65542a447c6 100644 --- a/lib/Target/MBlaze/MBlazeSchedule.td +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -1,4 +1,4 @@ -//===- MBlazeSchedule.td - MBlaze Scheduling Definitions --------*- C++ -*-===// +//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSIL/CMakeLists.txt b/lib/Target/MSIL/CMakeLists.txt deleted file mode 100644 index b1d47ef05ec5e..0000000000000 --- a/lib/Target/MSIL/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_target(MSIL - MSILWriter.cpp - ) diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp deleted file mode 100644 index cc350e8a4f892..0000000000000 --- a/lib/Target/MSIL/MSILWriter.cpp +++ /dev/null @@ -1,1706 +0,0 @@ -//===-- MSILWriter.cpp - Library for converting LLVM code to MSIL ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This library converts LLVM code to MSIL code. -// -//===----------------------------------------------------------------------===// - -#include "MSILWriter.h" -#include "llvm/CallingConv.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/TypeSymbolTable.h" -#include "llvm/Analysis/ConstantsScanner.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/Passes.h" -using namespace llvm; - -namespace llvm { - // TargetMachine for the MSIL - struct MSILTarget : public TargetMachine { - MSILTarget(const Target &T, const std::string &TT, const std::string &FS) - : TargetMachine(T) {} - - virtual bool addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify); - - virtual const TargetData *getTargetData() const { return 0; } - }; -} - -extern "C" void LLVMInitializeMSILTarget() { - // Register the target. - RegisterTargetMachine<MSILTarget> X(TheMSILTarget); -} - -bool MSILModule::runOnModule(Module &M) { - ModulePtr = &M; - TD = &getAnalysis<TargetData>(); - bool Changed = false; - // Find named types. - TypeSymbolTable& Table = M.getTypeSymbolTable(); - std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes(); - for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) { - if (!I->second->isStructTy() && !I->second->isOpaqueTy()) - Table.remove(I++); - else { - std::set<const Type *>::iterator T = Types.find(I->second); - if (T==Types.end()) - Table.remove(I++); - else { - Types.erase(T); - ++I; - } - } - } - // Find unnamed types. - unsigned RenameCounter = 0; - for (std::set<const Type *>::const_iterator I = Types.begin(), - E = Types.end(); I!=E; ++I) - if (const StructType *STy = dyn_cast<StructType>(*I)) { - while (ModulePtr->addTypeName("unnamed$"+utostr(RenameCounter), STy)) - ++RenameCounter; - Changed = true; - } - // Pointer for FunctionPass. - UsedTypes = &getAnalysis<FindUsedTypes>().getTypes(); - return Changed; -} - -char MSILModule::ID = 0; -char MSILWriter::ID = 0; - -bool MSILWriter::runOnFunction(Function &F) { - if (F.isDeclaration()) return false; - - // Do not codegen any 'available_externally' functions at all, they have - // definitions outside the translation unit. - if (F.hasAvailableExternallyLinkage()) - return false; - - LInfo = &getAnalysis<LoopInfo>(); - printFunction(F); - return false; -} - - -bool MSILWriter::doInitialization(Module &M) { - ModulePtr = &M; - Out << ".assembly extern mscorlib {}\n"; - Out << ".assembly MSIL {}\n\n"; - Out << "// External\n"; - printExternals(); - Out << "// Declarations\n"; - printDeclarations(M.getTypeSymbolTable()); - Out << "// Definitions\n"; - printGlobalVariables(); - Out << "// Startup code\n"; - printModuleStartup(); - return false; -} - - -bool MSILWriter::doFinalization(Module &M) { - return false; -} - - -void MSILWriter::printModuleStartup() { - Out << - ".method static public int32 $MSIL_Startup() {\n" - "\t.entrypoint\n" - "\t.locals (native int i)\n" - "\t.locals (native int argc)\n" - "\t.locals (native int ptr)\n" - "\t.locals (void* argv)\n" - "\t.locals (string[] args)\n" - "\tcall\tstring[] [mscorlib]System.Environment::GetCommandLineArgs()\n" - "\tdup\n" - "\tstloc\targs\n" - "\tldlen\n" - "\tconv.i4\n" - "\tdup\n" - "\tstloc\targc\n"; - printPtrLoad(TD->getPointerSize()); - Out << - "\tmul\n" - "\tlocalloc\n" - "\tstloc\targv\n" - "\tldc.i4.0\n" - "\tstloc\ti\n" - "L_01:\n" - "\tldloc\ti\n" - "\tldloc\targc\n" - "\tceq\n" - "\tbrtrue\tL_02\n" - "\tldloc\targs\n" - "\tldloc\ti\n" - "\tldelem.ref\n" - "\tcall\tnative int [mscorlib]System.Runtime.InteropServices.Marshal::" - "StringToHGlobalAnsi(string)\n" - "\tstloc\tptr\n" - "\tldloc\targv\n" - "\tldloc\ti\n"; - printPtrLoad(TD->getPointerSize()); - Out << - "\tmul\n" - "\tadd\n" - "\tldloc\tptr\n" - "\tstind.i\n" - "\tldloc\ti\n" - "\tldc.i4.1\n" - "\tadd\n" - "\tstloc\ti\n" - "\tbr\tL_01\n" - "L_02:\n" - "\tcall void $MSIL_Init()\n"; - - // Call user 'main' function. - const Function* F = ModulePtr->getFunction("main"); - if (!F || F->isDeclaration()) { - Out << "\tldc.i4.0\n\tret\n}\n"; - return; - } - bool BadSig = true; - std::string Args(""); - Function::const_arg_iterator Arg1,Arg2; - - switch (F->arg_size()) { - case 0: - BadSig = false; - break; - case 1: - Arg1 = F->arg_begin(); - if (Arg1->getType()->isIntegerTy()) { - Out << "\tldloc\targc\n"; - Args = getTypeName(Arg1->getType()); - BadSig = false; - } - break; - case 2: - Arg1 = Arg2 = F->arg_begin(); ++Arg2; - if (Arg1->getType()->isIntegerTy() && - Arg2->getType()->getTypeID() == Type::PointerTyID) { - Out << "\tldloc\targc\n\tldloc\targv\n"; - Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType()); - BadSig = false; - } - break; - default: - BadSig = true; - } - - bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID); - if (BadSig || (!F->getReturnType()->isIntegerTy() && !RetVoid)) { - Out << "\tldc.i4.0\n"; - } else { - Out << "\tcall\t" << getTypeName(F->getReturnType()) << - getConvModopt(F->getCallingConv()) << "main(" << Args << ")\n"; - if (RetVoid) - Out << "\tldc.i4.0\n"; - else - Out << "\tconv.i4\n"; - } - Out << "\tret\n}\n"; -} - -bool MSILWriter::isZeroValue(const Value* V) { - if (const Constant *C = dyn_cast<Constant>(V)) - return C->isNullValue(); - return false; -} - - -std::string MSILWriter::getValueName(const Value* V) { - std::string Name; - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) - Name = GV->getName(); - else { - unsigned &No = AnonValueNumbers[V]; - if (No == 0) No = ++NextAnonValueNumber; - Name = "tmp" + utostr(No); - } - - // Name into the quotes allow control and space characters. - return "'"+Name+"'"; -} - - -std::string MSILWriter::getLabelName(const std::string& Name) { - if (Name.find('.')!=std::string::npos) { - std::string Tmp(Name); - // Replace unaccepable characters in the label name. - for (std::string::iterator I = Tmp.begin(), E = Tmp.end(); I!=E; ++I) - if (*I=='.') *I = '@'; - return Tmp; - } - return Name; -} - - -std::string MSILWriter::getLabelName(const Value* V) { - std::string Name; - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) - Name = GV->getName(); - else { - unsigned &No = AnonValueNumbers[V]; - if (No == 0) No = ++NextAnonValueNumber; - Name = "tmp" + utostr(No); - } - - return getLabelName(Name); -} - - -std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) { - switch (CallingConvID) { - case CallingConv::C: - case CallingConv::Cold: - case CallingConv::Fast: - return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvCdecl) "; - case CallingConv::X86_FastCall: - return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) "; - case CallingConv::X86_StdCall: - return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) "; - case CallingConv::X86_ThisCall: - return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvThiscall) "; - default: - errs() << "CallingConvID = " << CallingConvID << '\n'; - llvm_unreachable("Unsupported calling convention"); - } - return ""; // Not reached -} - - -std::string MSILWriter::getArrayTypeName(Type::TypeID TyID, const Type* Ty) { - std::string Tmp = ""; - const Type* ElemTy = Ty; - assert(Ty->getTypeID()==TyID && "Invalid type passed"); - // Walk trought array element types. - for (;;) { - // Multidimensional array. - if (ElemTy->getTypeID()==TyID) { - if (const ArrayType* ATy = dyn_cast<ArrayType>(ElemTy)) - Tmp += utostr(ATy->getNumElements()); - else if (const VectorType* VTy = dyn_cast<VectorType>(ElemTy)) - Tmp += utostr(VTy->getNumElements()); - ElemTy = cast<SequentialType>(ElemTy)->getElementType(); - } - // Base element type found. - if (ElemTy->getTypeID()!=TyID) break; - Tmp += ","; - } - return getTypeName(ElemTy, false, true)+"["+Tmp+"]"; -} - - -std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) { - unsigned NumBits = 0; - switch (Ty->getTypeID()) { - case Type::VoidTyID: - return "void "; - case Type::IntegerTyID: - NumBits = getBitWidth(Ty); - if(NumBits==1) - return "bool "; - if (!isSigned) - return "unsigned int"+utostr(NumBits)+" "; - return "int"+utostr(NumBits)+" "; - case Type::FloatTyID: - return "float32 "; - case Type::DoubleTyID: - return "float64 "; - default: - errs() << "Type = " << *Ty << '\n'; - llvm_unreachable("Invalid primitive type"); - } - return ""; // Not reached -} - - -std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned, - bool isNested) { - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) - return getPrimitiveTypeName(Ty,isSigned); - // FIXME: "OpaqueType" support - switch (Ty->getTypeID()) { - case Type::PointerTyID: - return "void* "; - case Type::StructTyID: - if (isNested) - return ModulePtr->getTypeName(Ty); - return "valuetype '"+ModulePtr->getTypeName(Ty)+"' "; - case Type::ArrayTyID: - if (isNested) - return getArrayTypeName(Ty->getTypeID(),Ty); - return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' "; - case Type::VectorTyID: - if (isNested) - return getArrayTypeName(Ty->getTypeID(),Ty); - return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' "; - default: - errs() << "Type = " << *Ty << '\n'; - llvm_unreachable("Invalid type in getTypeName()"); - } - return ""; // Not reached -} - - -MSILWriter::ValueType MSILWriter::getValueLocation(const Value* V) { - // Function argument - if (isa<Argument>(V)) - return ArgumentVT; - // Function - else if (const Function* F = dyn_cast<Function>(V)) - return F->hasLocalLinkage() ? InternalVT : GlobalVT; - // Variable - else if (const GlobalVariable* G = dyn_cast<GlobalVariable>(V)) - return G->hasLocalLinkage() ? InternalVT : GlobalVT; - // Constant - else if (isa<Constant>(V)) - return isa<ConstantExpr>(V) ? ConstExprVT : ConstVT; - // Local variable - return LocalVT; -} - - -std::string MSILWriter::getTypePostfix(const Type* Ty, bool Expand, - bool isSigned) { - unsigned NumBits = 0; - switch (Ty->getTypeID()) { - // Integer constant, expanding for stack operations. - case Type::IntegerTyID: - NumBits = getBitWidth(Ty); - // Expand integer value to "int32" or "int64". - if (Expand) return (NumBits<=32 ? "i4" : "i8"); - if (NumBits==1) return "i1"; - return (isSigned ? "i" : "u")+utostr(NumBits/8); - // Float constant. - case Type::FloatTyID: - return "r4"; - case Type::DoubleTyID: - return "r8"; - case Type::PointerTyID: - return "i"+utostr(TD->getTypeAllocSize(Ty)); - default: - errs() << "TypeID = " << Ty->getTypeID() << '\n'; - llvm_unreachable("Invalid type in TypeToPostfix()"); - } - return ""; // Not reached -} - - -void MSILWriter::printConvToPtr() { - switch (ModulePtr->getPointerSize()) { - case Module::Pointer32: - printSimpleInstruction("conv.u4"); - break; - case Module::Pointer64: - printSimpleInstruction("conv.u8"); - break; - default: - llvm_unreachable("Module use not supporting pointer size"); - } -} - - -void MSILWriter::printPtrLoad(uint64_t N) { - switch (ModulePtr->getPointerSize()) { - case Module::Pointer32: - printSimpleInstruction("ldc.i4",utostr(N).c_str()); - // FIXME: Need overflow test? - if (!isUInt<32>(N)) { - errs() << "Value = " << utostr(N) << '\n'; - llvm_unreachable("32-bit pointer overflowed"); - } - break; - case Module::Pointer64: - printSimpleInstruction("ldc.i8",utostr(N).c_str()); - break; - default: - llvm_unreachable("Module use not supporting pointer size"); - } -} - - -void MSILWriter::printValuePtrLoad(const Value* V) { - printValueLoad(V); - printConvToPtr(); -} - - -void MSILWriter::printConstLoad(const Constant* C) { - if (const ConstantInt* CInt = dyn_cast<ConstantInt>(C)) { - // Integer constant - Out << "\tldc." << getTypePostfix(C->getType(),true) << '\t'; - if (CInt->isMinValue(true)) - Out << CInt->getSExtValue(); - else - Out << CInt->getZExtValue(); - } else if (const ConstantFP* FP = dyn_cast<ConstantFP>(C)) { - // Float constant - uint64_t X; - unsigned Size; - if (FP->getType()->getTypeID()==Type::FloatTyID) { - X = (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue(); - Size = 4; - } else { - X = FP->getValueAPF().bitcastToAPInt().getZExtValue(); - Size = 8; - } - Out << "\tldc.r" << Size << "\t( " << utohexstr(X) << ')'; - } else if (isa<UndefValue>(C)) { - // Undefined constant value = NULL. - printPtrLoad(0); - } else { - errs() << "Constant = " << *C << '\n'; - llvm_unreachable("Invalid constant value"); - } - Out << '\n'; -} - - -void MSILWriter::printValueLoad(const Value* V) { - MSILWriter::ValueType Location = getValueLocation(V); - switch (Location) { - // Global variable or function address. - case GlobalVT: - case InternalVT: - if (const Function* F = dyn_cast<Function>(V)) { - std::string Name = getConvModopt(F->getCallingConv())+getValueName(F); - printSimpleInstruction("ldftn", - getCallSignature(F->getFunctionType(),NULL,Name).c_str()); - } else { - std::string Tmp; - const Type* ElemTy = cast<PointerType>(V->getType())->getElementType(); - if (Location==GlobalVT && cast<GlobalVariable>(V)->hasDLLImportLinkage()) { - Tmp = "void* "+getValueName(V); - printSimpleInstruction("ldsfld",Tmp.c_str()); - } else { - Tmp = getTypeName(ElemTy)+getValueName(V); - printSimpleInstruction("ldsflda",Tmp.c_str()); - } - } - break; - // Function argument. - case ArgumentVT: - printSimpleInstruction("ldarg",getValueName(V).c_str()); - break; - // Local function variable. - case LocalVT: - printSimpleInstruction("ldloc",getValueName(V).c_str()); - break; - // Constant value. - case ConstVT: - if (isa<ConstantPointerNull>(V)) - printPtrLoad(0); - else - printConstLoad(cast<Constant>(V)); - break; - // Constant expression. - case ConstExprVT: - printConstantExpr(cast<ConstantExpr>(V)); - break; - default: - errs() << "Value = " << *V << '\n'; - llvm_unreachable("Invalid value location"); - } -} - - -void MSILWriter::printValueSave(const Value* V) { - switch (getValueLocation(V)) { - case ArgumentVT: - printSimpleInstruction("starg",getValueName(V).c_str()); - break; - case LocalVT: - printSimpleInstruction("stloc",getValueName(V).c_str()); - break; - default: - errs() << "Value = " << *V << '\n'; - llvm_unreachable("Invalid value location"); - } -} - - -void MSILWriter::printBinaryInstruction(const char* Name, const Value* Left, - const Value* Right) { - printValueLoad(Left); - printValueLoad(Right); - Out << '\t' << Name << '\n'; -} - - -void MSILWriter::printSimpleInstruction(const char* Inst, const char* Operand) { - if(Operand) - Out << '\t' << Inst << '\t' << Operand << '\n'; - else - Out << '\t' << Inst << '\n'; -} - - -void MSILWriter::printPHICopy(const BasicBlock* Src, const BasicBlock* Dst) { - for (BasicBlock::const_iterator I = Dst->begin(); isa<PHINode>(I); ++I) { - const PHINode* Phi = cast<PHINode>(I); - const Value* Val = Phi->getIncomingValueForBlock(Src); - if (isa<UndefValue>(Val)) continue; - printValueLoad(Val); - printValueSave(Phi); - } -} - - -void MSILWriter::printBranchToBlock(const BasicBlock* CurrBB, - const BasicBlock* TrueBB, - const BasicBlock* FalseBB) { - if (TrueBB==FalseBB) { - // "TrueBB" and "FalseBB" destination equals - printPHICopy(CurrBB,TrueBB); - printSimpleInstruction("pop"); - printSimpleInstruction("br",getLabelName(TrueBB).c_str()); - } else if (FalseBB==NULL) { - // If "FalseBB" not used the jump have condition - printPHICopy(CurrBB,TrueBB); - printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str()); - } else if (TrueBB==NULL) { - // If "TrueBB" not used the jump is unconditional - printPHICopy(CurrBB,FalseBB); - printSimpleInstruction("br",getLabelName(FalseBB).c_str()); - } else { - // Copy PHI instructions for each block - std::string TmpLabel; - // Print PHI instructions for "TrueBB" - if (isa<PHINode>(TrueBB->begin())) { - TmpLabel = getLabelName(TrueBB)+"$phi_"+utostr(getUniqID()); - printSimpleInstruction("brtrue",TmpLabel.c_str()); - } else { - printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str()); - } - // Print PHI instructions for "FalseBB" - if (isa<PHINode>(FalseBB->begin())) { - printPHICopy(CurrBB,FalseBB); - printSimpleInstruction("br",getLabelName(FalseBB).c_str()); - } else { - printSimpleInstruction("br",getLabelName(FalseBB).c_str()); - } - if (isa<PHINode>(TrueBB->begin())) { - // Handle "TrueBB" PHI Copy - Out << TmpLabel << ":\n"; - printPHICopy(CurrBB,TrueBB); - printSimpleInstruction("br",getLabelName(TrueBB).c_str()); - } - } -} - - -void MSILWriter::printBranchInstruction(const BranchInst* Inst) { - if (Inst->isUnconditional()) { - printBranchToBlock(Inst->getParent(),NULL,Inst->getSuccessor(0)); - } else { - printValueLoad(Inst->getCondition()); - printBranchToBlock(Inst->getParent(),Inst->getSuccessor(0), - Inst->getSuccessor(1)); - } -} - - -void MSILWriter::printSelectInstruction(const Value* Cond, const Value* VTrue, - const Value* VFalse) { - std::string TmpLabel = std::string("select$true_")+utostr(getUniqID()); - printValueLoad(VTrue); - printValueLoad(Cond); - printSimpleInstruction("brtrue",TmpLabel.c_str()); - printSimpleInstruction("pop"); - printValueLoad(VFalse); - Out << TmpLabel << ":\n"; -} - - -void MSILWriter::printIndirectLoad(const Value* V) { - const Type* Ty = V->getType(); - printValueLoad(V); - if (const PointerType* P = dyn_cast<PointerType>(Ty)) - Ty = P->getElementType(); - std::string Tmp = "ldind."+getTypePostfix(Ty, false); - printSimpleInstruction(Tmp.c_str()); -} - - -void MSILWriter::printIndirectSave(const Value* Ptr, const Value* Val) { - printValueLoad(Ptr); - printValueLoad(Val); - printIndirectSave(Val->getType()); -} - - -void MSILWriter::printIndirectSave(const Type* Ty) { - // Instruction need signed postfix for any type. - std::string postfix = getTypePostfix(Ty, false); - if (*postfix.begin()=='u') *postfix.begin() = 'i'; - postfix = "stind."+postfix; - printSimpleInstruction(postfix.c_str()); -} - - -void MSILWriter::printCastInstruction(unsigned int Op, const Value* V, - const Type* Ty, const Type* SrcTy) { - std::string Tmp(""); - printValueLoad(V); - switch (Op) { - // Signed - case Instruction::SExt: - // If sign extending int, convert first from unsigned to signed - // with the same bit size - because otherwise we will loose the sign. - if (SrcTy) { - Tmp = "conv."+getTypePostfix(SrcTy,false,true); - printSimpleInstruction(Tmp.c_str()); - } - // FALLTHROUGH - case Instruction::SIToFP: - case Instruction::FPToSI: - Tmp = "conv."+getTypePostfix(Ty,false,true); - printSimpleInstruction(Tmp.c_str()); - break; - // Unsigned - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::FPToUI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - Tmp = "conv."+getTypePostfix(Ty,false); - printSimpleInstruction(Tmp.c_str()); - break; - // Do nothing - case Instruction::BitCast: - // FIXME: meaning that ld*/st* instruction do not change data format. - break; - default: - errs() << "Opcode = " << Op << '\n'; - llvm_unreachable("Invalid conversion instruction"); - } -} - - -void MSILWriter::printGepInstruction(const Value* V, gep_type_iterator I, - gep_type_iterator E) { - unsigned Size; - // Load address - printValuePtrLoad(V); - // Calculate element offset. - for (; I!=E; ++I){ - Size = 0; - const Value* IndexValue = I.getOperand(); - if (const StructType* StrucTy = dyn_cast<StructType>(*I)) { - uint64_t FieldIndex = cast<ConstantInt>(IndexValue)->getZExtValue(); - // Offset is the sum of all previous structure fields. - for (uint64_t F = 0; F<FieldIndex; ++F) - Size += TD->getTypeAllocSize(StrucTy->getContainedType((unsigned)F)); - printPtrLoad(Size); - printSimpleInstruction("add"); - continue; - } else if (const SequentialType* SeqTy = dyn_cast<SequentialType>(*I)) { - Size = TD->getTypeAllocSize(SeqTy->getElementType()); - } else { - Size = TD->getTypeAllocSize(*I); - } - // Add offset of current element to stack top. - if (!isZeroValue(IndexValue)) { - // Constant optimization. - if (const ConstantInt* C = dyn_cast<ConstantInt>(IndexValue)) { - if (C->getValue().isNegative()) { - printPtrLoad(C->getValue().abs().getZExtValue()*Size); - printSimpleInstruction("sub"); - continue; - } else - printPtrLoad(C->getZExtValue()*Size); - } else { - printPtrLoad(Size); - printValuePtrLoad(IndexValue); - printSimpleInstruction("mul"); - } - printSimpleInstruction("add"); - } - } -} - - -std::string MSILWriter::getCallSignature(const FunctionType* Ty, - const Instruction* Inst, - std::string Name) { - std::string Tmp(""); - if (Ty->isVarArg()) Tmp += "vararg "; - // Name and return type. - Tmp += getTypeName(Ty->getReturnType())+Name+"("; - // Function argument type list. - unsigned NumParams = Ty->getNumParams(); - for (unsigned I = 0; I!=NumParams; ++I) { - if (I!=0) Tmp += ","; - Tmp += getTypeName(Ty->getParamType(I)); - } - // CLR needs to know the exact amount of parameters received by vararg - // function, because caller cleans the stack. - if (Ty->isVarArg() && Inst) { - // Origin to function arguments in "CallInst" or "InvokeInst". - unsigned Org = isa<InvokeInst>(Inst) ? 3 : 1; - // Print variable argument types. - unsigned NumOperands = Inst->getNumOperands()-Org; - if (NumParams<NumOperands) { - if (NumParams!=0) Tmp += ", "; - Tmp += "... , "; - for (unsigned J = NumParams; J!=NumOperands; ++J) { - if (J!=NumParams) Tmp += ", "; - Tmp += getTypeName(Inst->getOperand(J+Org)->getType()); - } - } - } - return Tmp+")"; -} - - -void MSILWriter::printFunctionCall(const Value* FnVal, - const Instruction* Inst) { - // Get function calling convention. - std::string Name = ""; - if (const CallInst* Call = dyn_cast<CallInst>(Inst)) - Name = getConvModopt(Call->getCallingConv()); - else if (const InvokeInst* Invoke = dyn_cast<InvokeInst>(Inst)) - Name = getConvModopt(Invoke->getCallingConv()); - else { - errs() << "Instruction = " << Inst->getName() << '\n'; - llvm_unreachable("Need \"Invoke\" or \"Call\" instruction only"); - } - if (const Function* F = dyn_cast<Function>(FnVal)) { - // Direct call. - Name += getValueName(F); - printSimpleInstruction("call", - getCallSignature(F->getFunctionType(),Inst,Name).c_str()); - } else { - // Indirect function call. - const PointerType* PTy = cast<PointerType>(FnVal->getType()); - const FunctionType* FTy = cast<FunctionType>(PTy->getElementType()); - // Load function address. - printValueLoad(FnVal); - printSimpleInstruction("calli",getCallSignature(FTy,Inst,Name).c_str()); - } -} - - -void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) { - std::string Name; - switch (Inst->getIntrinsicID()) { - case Intrinsic::vastart: - Name = getValueName(Inst->getArgOperand(0)); - Name.insert(Name.length()-1,"$valist"); - // Obtain the argument handle. - printSimpleInstruction("ldloca",Name.c_str()); - printSimpleInstruction("arglist"); - printSimpleInstruction("call", - "instance void [mscorlib]System.ArgIterator::.ctor" - "(valuetype [mscorlib]System.RuntimeArgumentHandle)"); - // Save as pointer type "void*" - printValueLoad(Inst->getArgOperand(0)); - printSimpleInstruction("ldloca",Name.c_str()); - printIndirectSave(PointerType::getUnqual( - IntegerType::get(Inst->getContext(), 8))); - break; - case Intrinsic::vaend: - // Close argument list handle. - printIndirectLoad(Inst->getArgOperand(0)); - printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()"); - break; - case Intrinsic::vacopy: - // Copy "ArgIterator" valuetype. - printIndirectLoad(Inst->getArgOperand(0)); - printIndirectLoad(Inst->getArgOperand(1)); - printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator"); - break; - default: - errs() << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n'; - llvm_unreachable("Invalid intrinsic function"); - } -} - - -void MSILWriter::printCallInstruction(const Instruction* Inst) { - if (isa<IntrinsicInst>(Inst)) { - // Handle intrinsic function. - printIntrinsicCall(cast<IntrinsicInst>(Inst)); - } else { - const CallInst *CI = cast<CallInst>(Inst); - // Load arguments to stack and call function. - for (int I = 0, E = CI->getNumArgOperands(); I!=E; ++I) - printValueLoad(CI->getArgOperand(I)); - printFunctionCall(CI->getCalledFunction(), Inst); - } -} - - -void MSILWriter::printICmpInstruction(unsigned Predicate, const Value* Left, - const Value* Right) { - switch (Predicate) { - case ICmpInst::ICMP_EQ: - printBinaryInstruction("ceq",Left,Right); - break; - case ICmpInst::ICMP_NE: - // Emulate = not neg (Op1 eq Op2) - printBinaryInstruction("ceq",Left,Right); - printSimpleInstruction("neg"); - printSimpleInstruction("not"); - break; - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: - // Emulate = (Op1 eq Op2) or (Op1 lt Op2) - printBinaryInstruction("ceq",Left,Right); - if (Predicate==ICmpInst::ICMP_ULE) - printBinaryInstruction("clt.un",Left,Right); - else - printBinaryInstruction("clt",Left,Right); - printSimpleInstruction("or"); - break; - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: - // Emulate = (Op1 eq Op2) or (Op1 gt Op2) - printBinaryInstruction("ceq",Left,Right); - if (Predicate==ICmpInst::ICMP_UGE) - printBinaryInstruction("cgt.un",Left,Right); - else - printBinaryInstruction("cgt",Left,Right); - printSimpleInstruction("or"); - break; - case ICmpInst::ICMP_ULT: - printBinaryInstruction("clt.un",Left,Right); - break; - case ICmpInst::ICMP_SLT: - printBinaryInstruction("clt",Left,Right); - break; - case ICmpInst::ICMP_UGT: - printBinaryInstruction("cgt.un",Left,Right); - break; - case ICmpInst::ICMP_SGT: - printBinaryInstruction("cgt",Left,Right); - break; - default: - errs() << "Predicate = " << Predicate << '\n'; - llvm_unreachable("Invalid icmp predicate"); - } -} - - -void MSILWriter::printFCmpInstruction(unsigned Predicate, const Value* Left, - const Value* Right) { - // FIXME: Correct comparison - std::string NanFunc = "bool [mscorlib]System.Double::IsNaN(float64)"; - switch (Predicate) { - case FCmpInst::FCMP_UGT: - // X > Y || llvm_fcmp_uno(X, Y) - printBinaryInstruction("cgt",Left,Right); - printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_OGT: - // X > Y - printBinaryInstruction("cgt",Left,Right); - break; - case FCmpInst::FCMP_UGE: - // X >= Y || llvm_fcmp_uno(X, Y) - printBinaryInstruction("ceq",Left,Right); - printBinaryInstruction("cgt",Left,Right); - printSimpleInstruction("or"); - printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_OGE: - // X >= Y - printBinaryInstruction("ceq",Left,Right); - printBinaryInstruction("cgt",Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_ULT: - // X < Y || llvm_fcmp_uno(X, Y) - printBinaryInstruction("clt",Left,Right); - printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_OLT: - // X < Y - printBinaryInstruction("clt",Left,Right); - break; - case FCmpInst::FCMP_ULE: - // X <= Y || llvm_fcmp_uno(X, Y) - printBinaryInstruction("ceq",Left,Right); - printBinaryInstruction("clt",Left,Right); - printSimpleInstruction("or"); - printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_OLE: - // X <= Y - printBinaryInstruction("ceq",Left,Right); - printBinaryInstruction("clt",Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_UEQ: - // X == Y || llvm_fcmp_uno(X, Y) - printBinaryInstruction("ceq",Left,Right); - printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_OEQ: - // X == Y - printBinaryInstruction("ceq",Left,Right); - break; - case FCmpInst::FCMP_UNE: - // X != Y - printBinaryInstruction("ceq",Left,Right); - printSimpleInstruction("neg"); - printSimpleInstruction("not"); - break; - case FCmpInst::FCMP_ONE: - // X != Y && llvm_fcmp_ord(X, Y) - printBinaryInstruction("ceq",Left,Right); - printSimpleInstruction("not"); - break; - case FCmpInst::FCMP_ORD: - // return X == X && Y == Y - printBinaryInstruction("ceq",Left,Left); - printBinaryInstruction("ceq",Right,Right); - printSimpleInstruction("or"); - break; - case FCmpInst::FCMP_UNO: - // X != X || Y != Y - printBinaryInstruction("ceq",Left,Left); - printSimpleInstruction("not"); - printBinaryInstruction("ceq",Right,Right); - printSimpleInstruction("not"); - printSimpleInstruction("or"); - break; - default: - llvm_unreachable("Illegal FCmp predicate"); - } -} - - -void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) { - std::string Label = "leave$normal_"+utostr(getUniqID()); - Out << ".try {\n"; - // Load arguments - for (int I = 0, E = Inst->getNumArgOperands(); I!=E; ++I) - printValueLoad(Inst->getArgOperand(I)); - // Print call instruction - printFunctionCall(Inst->getOperand(0),Inst); - // Save function result and leave "try" block - printValueSave(Inst); - printSimpleInstruction("leave",Label.c_str()); - Out << "}\n"; - Out << "catch [mscorlib]System.Exception {\n"; - // Redirect to unwind block - printSimpleInstruction("pop"); - printBranchToBlock(Inst->getParent(),NULL,Inst->getUnwindDest()); - Out << "}\n" << Label << ":\n"; - // Redirect to continue block - printBranchToBlock(Inst->getParent(),NULL,Inst->getNormalDest()); -} - - -void MSILWriter::printSwitchInstruction(const SwitchInst* Inst) { - // FIXME: Emulate with IL "switch" instruction - // Emulate = if () else if () else if () else ... - for (unsigned int I = 1, E = Inst->getNumCases(); I!=E; ++I) { - printValueLoad(Inst->getCondition()); - printValueLoad(Inst->getCaseValue(I)); - printSimpleInstruction("ceq"); - // Condition jump to successor block - printBranchToBlock(Inst->getParent(),Inst->getSuccessor(I),NULL); - } - // Jump to default block - printBranchToBlock(Inst->getParent(),NULL,Inst->getDefaultDest()); -} - - -void MSILWriter::printVAArgInstruction(const VAArgInst* Inst) { - printIndirectLoad(Inst->getOperand(0)); - printSimpleInstruction("call", - "instance typedref [mscorlib]System.ArgIterator::GetNextArg()"); - printSimpleInstruction("refanyval","void*"); - std::string Name = - "ldind."+getTypePostfix(PointerType::getUnqual( - IntegerType::get(Inst->getContext(), 8)),false); - printSimpleInstruction(Name.c_str()); -} - - -void MSILWriter::printAllocaInstruction(const AllocaInst* Inst) { - uint64_t Size = TD->getTypeAllocSize(Inst->getAllocatedType()); - // Constant optimization. - if (const ConstantInt* CInt = dyn_cast<ConstantInt>(Inst->getOperand(0))) { - printPtrLoad(CInt->getZExtValue()*Size); - } else { - printPtrLoad(Size); - printValueLoad(Inst->getOperand(0)); - printSimpleInstruction("mul"); - } - printSimpleInstruction("localloc"); -} - - -void MSILWriter::printInstruction(const Instruction* Inst) { - const Value *Left = 0, *Right = 0; - if (Inst->getNumOperands()>=1) Left = Inst->getOperand(0); - if (Inst->getNumOperands()>=2) Right = Inst->getOperand(1); - // Print instruction - // FIXME: "ShuffleVector","ExtractElement","InsertElement" support. - switch (Inst->getOpcode()) { - // Terminator - case Instruction::Ret: - if (Inst->getNumOperands()) { - printValueLoad(Left); - printSimpleInstruction("ret"); - } else - printSimpleInstruction("ret"); - break; - case Instruction::Br: - printBranchInstruction(cast<BranchInst>(Inst)); - break; - // Binary - case Instruction::Add: - case Instruction::FAdd: - printBinaryInstruction("add",Left,Right); - break; - case Instruction::Sub: - case Instruction::FSub: - printBinaryInstruction("sub",Left,Right); - break; - case Instruction::Mul: - case Instruction::FMul: - printBinaryInstruction("mul",Left,Right); - break; - case Instruction::UDiv: - printBinaryInstruction("div.un",Left,Right); - break; - case Instruction::SDiv: - case Instruction::FDiv: - printBinaryInstruction("div",Left,Right); - break; - case Instruction::URem: - printBinaryInstruction("rem.un",Left,Right); - break; - case Instruction::SRem: - case Instruction::FRem: - printBinaryInstruction("rem",Left,Right); - break; - // Binary Condition - case Instruction::ICmp: - printICmpInstruction(cast<ICmpInst>(Inst)->getPredicate(),Left,Right); - break; - case Instruction::FCmp: - printFCmpInstruction(cast<FCmpInst>(Inst)->getPredicate(),Left,Right); - break; - // Bitwise Binary - case Instruction::And: - printBinaryInstruction("and",Left,Right); - break; - case Instruction::Or: - printBinaryInstruction("or",Left,Right); - break; - case Instruction::Xor: - printBinaryInstruction("xor",Left,Right); - break; - case Instruction::Shl: - printValueLoad(Left); - printValueLoad(Right); - printSimpleInstruction("conv.i4"); - printSimpleInstruction("shl"); - break; - case Instruction::LShr: - printValueLoad(Left); - printValueLoad(Right); - printSimpleInstruction("conv.i4"); - printSimpleInstruction("shr.un"); - break; - case Instruction::AShr: - printValueLoad(Left); - printValueLoad(Right); - printSimpleInstruction("conv.i4"); - printSimpleInstruction("shr"); - break; - case Instruction::Select: - printSelectInstruction(Inst->getOperand(0),Inst->getOperand(1),Inst->getOperand(2)); - break; - case Instruction::Load: - printIndirectLoad(Inst->getOperand(0)); - break; - case Instruction::Store: - printIndirectSave(Inst->getOperand(1), Inst->getOperand(0)); - break; - case Instruction::SExt: - printCastInstruction(Inst->getOpcode(),Left, - cast<CastInst>(Inst)->getDestTy(), - cast<CastInst>(Inst)->getSrcTy()); - break; - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - printCastInstruction(Inst->getOpcode(),Left, - cast<CastInst>(Inst)->getDestTy()); - break; - case Instruction::GetElementPtr: - printGepInstruction(Inst->getOperand(0),gep_type_begin(Inst), - gep_type_end(Inst)); - break; - case Instruction::Call: - printCallInstruction(cast<CallInst>(Inst)); - break; - case Instruction::Invoke: - printInvokeInstruction(cast<InvokeInst>(Inst)); - break; - case Instruction::Unwind: - printSimpleInstruction("newobj", - "instance void [mscorlib]System.Exception::.ctor()"); - printSimpleInstruction("throw"); - break; - case Instruction::Switch: - printSwitchInstruction(cast<SwitchInst>(Inst)); - break; - case Instruction::Alloca: - printAllocaInstruction(cast<AllocaInst>(Inst)); - break; - case Instruction::Unreachable: - printSimpleInstruction("ldstr", "\"Unreachable instruction\""); - printSimpleInstruction("newobj", - "instance void [mscorlib]System.Exception::.ctor(string)"); - printSimpleInstruction("throw"); - break; - case Instruction::VAArg: - printVAArgInstruction(cast<VAArgInst>(Inst)); - break; - default: - errs() << "Instruction = " << Inst->getName() << '\n'; - llvm_unreachable("Unsupported instruction"); - } -} - - -void MSILWriter::printLoop(const Loop* L) { - Out << getLabelName(L->getHeader()->getName()) << ":\n"; - const std::vector<BasicBlock*>& blocks = L->getBlocks(); - for (unsigned I = 0, E = blocks.size(); I!=E; I++) { - BasicBlock* BB = blocks[I]; - Loop* BBLoop = LInfo->getLoopFor(BB); - if (BBLoop == L) - printBasicBlock(BB); - else if (BB==BBLoop->getHeader() && BBLoop->getParentLoop()==L) - printLoop(BBLoop); - } - printSimpleInstruction("br",getLabelName(L->getHeader()->getName()).c_str()); -} - - -void MSILWriter::printBasicBlock(const BasicBlock* BB) { - Out << getLabelName(BB) << ":\n"; - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { - const Instruction* Inst = I; - // Comment llvm original instruction - // Out << "\n//" << *Inst << "\n"; - // Do not handle PHI instruction in current block - if (Inst->getOpcode()==Instruction::PHI) continue; - // Print instruction - printInstruction(Inst); - // Save result - if (Inst->getType()!=Type::getVoidTy(BB->getContext())) { - // Do not save value after invoke, it done in "try" block - if (Inst->getOpcode()==Instruction::Invoke) continue; - printValueSave(Inst); - } - } -} - - -void MSILWriter::printLocalVariables(const Function& F) { - std::string Name; - const Type* Ty = NULL; - std::set<const Value*> Printed; - const Value* VaList = NULL; - unsigned StackDepth = 8; - // Find local variables - for (const_inst_iterator I = inst_begin(&F), E = inst_end(&F); I!=E; ++I) { - if (I->getOpcode()==Instruction::Call || - I->getOpcode()==Instruction::Invoke) { - // Test stack depth. - if (StackDepth<I->getNumOperands()) - StackDepth = I->getNumOperands(); - } - const AllocaInst* AI = dyn_cast<AllocaInst>(&*I); - if (AI && !isa<GlobalVariable>(AI)) { - // Local variable allocation. - Ty = PointerType::getUnqual(AI->getAllocatedType()); - Name = getValueName(AI); - Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n"; - } else if (I->getType()!=Type::getVoidTy(F.getContext())) { - // Operation result. - Ty = I->getType(); - Name = getValueName(&*I); - Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n"; - } - // Test on 'va_list' variable - bool isVaList = false; - if (const VAArgInst* VaInst = dyn_cast<VAArgInst>(&*I)) { - // "va_list" as "va_arg" instruction operand. - isVaList = true; - VaList = VaInst->getOperand(0); - } else if (const IntrinsicInst* Inst = dyn_cast<IntrinsicInst>(&*I)) { - // "va_list" as intrinsic function operand. - switch (Inst->getIntrinsicID()) { - case Intrinsic::vastart: - case Intrinsic::vaend: - case Intrinsic::vacopy: - isVaList = true; - VaList = Inst->getArgOperand(0); - break; - default: - isVaList = false; - } - } - // Print "va_list" variable. - if (isVaList && Printed.insert(VaList).second) { - Name = getValueName(VaList); - Name.insert(Name.length()-1,"$valist"); - Out << "\t.locals (valuetype [mscorlib]System.ArgIterator " - << Name << ")\n"; - } - } - printSimpleInstruction(".maxstack",utostr(StackDepth*2).c_str()); -} - - -void MSILWriter::printFunctionBody(const Function& F) { - // Print body - for (Function::const_iterator I = F.begin(), E = F.end(); I!=E; ++I) { - if (Loop *L = LInfo->getLoopFor(I)) { - if (L->getHeader()==I && L->getParentLoop()==0) - printLoop(L); - } else { - printBasicBlock(I); - } - } -} - - -void MSILWriter::printConstantExpr(const ConstantExpr* CE) { - const Value *left = 0, *right = 0; - if (CE->getNumOperands()>=1) left = CE->getOperand(0); - if (CE->getNumOperands()>=2) right = CE->getOperand(1); - // Print instruction - switch (CE->getOpcode()) { - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - printCastInstruction(CE->getOpcode(),left,CE->getType()); - break; - case Instruction::GetElementPtr: - printGepInstruction(CE->getOperand(0),gep_type_begin(CE),gep_type_end(CE)); - break; - case Instruction::ICmp: - printICmpInstruction(CE->getPredicate(),left,right); - break; - case Instruction::FCmp: - printFCmpInstruction(CE->getPredicate(),left,right); - break; - case Instruction::Select: - printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2)); - break; - case Instruction::Add: - case Instruction::FAdd: - printBinaryInstruction("add",left,right); - break; - case Instruction::Sub: - case Instruction::FSub: - printBinaryInstruction("sub",left,right); - break; - case Instruction::Mul: - case Instruction::FMul: - printBinaryInstruction("mul",left,right); - break; - case Instruction::UDiv: - printBinaryInstruction("div.un",left,right); - break; - case Instruction::SDiv: - case Instruction::FDiv: - printBinaryInstruction("div",left,right); - break; - case Instruction::URem: - printBinaryInstruction("rem.un",left,right); - break; - case Instruction::SRem: - case Instruction::FRem: - printBinaryInstruction("rem",left,right); - break; - case Instruction::And: - printBinaryInstruction("and",left,right); - break; - case Instruction::Or: - printBinaryInstruction("or",left,right); - break; - case Instruction::Xor: - printBinaryInstruction("xor",left,right); - break; - case Instruction::Shl: - printBinaryInstruction("shl",left,right); - break; - case Instruction::LShr: - printBinaryInstruction("shr.un",left,right); - break; - case Instruction::AShr: - printBinaryInstruction("shr",left,right); - break; - default: - errs() << "Expression = " << *CE << "\n"; - llvm_unreachable("Invalid constant expression"); - } -} - - -void MSILWriter::printStaticInitializerList() { - // List of global variables with uninitialized fields. - for (std::map<const GlobalVariable*,std::vector<StaticInitializer> >::iterator - VarI = StaticInitList.begin(), VarE = StaticInitList.end(); VarI!=VarE; - ++VarI) { - const std::vector<StaticInitializer>& InitList = VarI->second; - if (InitList.empty()) continue; - // For each uninitialized field. - for (std::vector<StaticInitializer>::const_iterator I = InitList.begin(), - E = InitList.end(); I!=E; ++I) { - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(I->constant)) { - // Out << "\n// Init " << getValueName(VarI->first) << ", offset " << - // utostr(I->offset) << ", type "<< *I->constant->getType() << "\n\n"; - // Load variable address - printValueLoad(VarI->first); - // Add offset - if (I->offset!=0) { - printPtrLoad(I->offset); - printSimpleInstruction("add"); - } - // Load value - printConstantExpr(CE); - // Save result at offset - std::string postfix = getTypePostfix(CE->getType(),true); - if (*postfix.begin()=='u') *postfix.begin() = 'i'; - postfix = "stind."+postfix; - printSimpleInstruction(postfix.c_str()); - } else { - errs() << "Constant = " << *I->constant << '\n'; - llvm_unreachable("Invalid static initializer"); - } - } - } -} - - -void MSILWriter::printFunction(const Function& F) { - bool isSigned = F.paramHasAttr(0, Attribute::SExt); - Out << "\n.method static "; - Out << (F.hasLocalLinkage() ? "private " : "public "); - if (F.isVarArg()) Out << "vararg "; - Out << getTypeName(F.getReturnType(),isSigned) << - getConvModopt(F.getCallingConv()) << getValueName(&F) << '\n'; - // Arguments - Out << "\t("; - unsigned ArgIdx = 1; - for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I!=E; - ++I, ++ArgIdx) { - isSigned = F.paramHasAttr(ArgIdx, Attribute::SExt); - if (I!=F.arg_begin()) Out << ", "; - Out << getTypeName(I->getType(),isSigned) << getValueName(I); - } - Out << ") cil managed\n"; - // Body - Out << "{\n"; - printLocalVariables(F); - printFunctionBody(F); - Out << "}\n"; -} - - -void MSILWriter::printDeclarations(const TypeSymbolTable& ST) { - std::string Name; - std::set<const Type*> Printed; - for (std::set<const Type*>::const_iterator - UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) { - const Type* Ty = *UI; - if (Ty->isArrayTy() || Ty->isVectorTy() || Ty->isStructTy()) - Name = getTypeName(Ty, false, true); - // Type with no need to declare. - else continue; - // Print not duplicated type - if (Printed.insert(Ty).second) { - Out << ".class value explicit ansi sealed '" << Name << "'"; - Out << " { .pack " << 1 << " .size " << TD->getTypeAllocSize(Ty); - Out << " }\n\n"; - } - } -} - - -unsigned int MSILWriter::getBitWidth(const Type* Ty) { - unsigned int N = Ty->getPrimitiveSizeInBits(); - assert(N!=0 && "Invalid type in getBitWidth()"); - switch (N) { - case 1: - case 8: - case 16: - case 32: - case 64: - return N; - default: - errs() << "Bits = " << N << '\n'; - llvm_unreachable("Unsupported integer width"); - } - return 0; // Not reached -} - - -void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) { - uint64_t TySize = 0; - const Type* Ty = C->getType(); - // Print zero initialized constant. - if (isa<ConstantAggregateZero>(C) || C->isNullValue()) { - TySize = TD->getTypeAllocSize(C->getType()); - Offset += TySize; - Out << "int8 (0) [" << TySize << "]"; - return; - } - // Print constant initializer - switch (Ty->getTypeID()) { - case Type::IntegerTyID: { - TySize = TD->getTypeAllocSize(Ty); - const ConstantInt* Int = cast<ConstantInt>(C); - Out << getPrimitiveTypeName(Ty,true) << "(" << Int->getSExtValue() << ")"; - break; - } - case Type::FloatTyID: - case Type::DoubleTyID: { - TySize = TD->getTypeAllocSize(Ty); - const ConstantFP* FP = cast<ConstantFP>(C); - if (Ty->getTypeID() == Type::FloatTyID) - Out << "int32 (" << - (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')'; - else - Out << "int64 (" << - FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')'; - break; - } - case Type::ArrayTyID: - case Type::VectorTyID: - case Type::StructTyID: - for (unsigned I = 0, E = C->getNumOperands(); I<E; I++) { - if (I!=0) Out << ",\n"; - printStaticConstant(cast<Constant>(C->getOperand(I)), Offset); - } - break; - case Type::PointerTyID: - TySize = TD->getTypeAllocSize(C->getType()); - // Initialize with global variable address - if (const GlobalVariable *G = dyn_cast<GlobalVariable>(C)) { - std::string name = getValueName(G); - Out << "&(" << name.insert(name.length()-1,"$data") << ")"; - } else { - // Dynamic initialization - if (!isa<ConstantPointerNull>(C) && !C->isNullValue()) - InitListPtr->push_back(StaticInitializer(C,Offset)); - // Null pointer initialization - if (TySize==4) Out << "int32 (0)"; - else if (TySize==8) Out << "int64 (0)"; - else llvm_unreachable("Invalid pointer size"); - } - break; - default: - errs() << "TypeID = " << Ty->getTypeID() << '\n'; - llvm_unreachable("Invalid type in printStaticConstant()"); - } - // Increase offset. - Offset += TySize; -} - - -void MSILWriter::printStaticInitializer(const Constant* C, - const std::string& Name) { - switch (C->getType()->getTypeID()) { - case Type::IntegerTyID: - case Type::FloatTyID: - case Type::DoubleTyID: - Out << getPrimitiveTypeName(C->getType(), false); - break; - case Type::ArrayTyID: - case Type::VectorTyID: - case Type::StructTyID: - case Type::PointerTyID: - Out << getTypeName(C->getType()); - break; - default: - errs() << "Type = " << *C << "\n"; - llvm_unreachable("Invalid constant type"); - } - // Print initializer - std::string label = Name; - label.insert(label.length()-1,"$data"); - Out << Name << " at " << label << '\n'; - Out << ".data " << label << " = {\n"; - uint64_t offset = 0; - printStaticConstant(C,offset); - Out << "\n}\n\n"; -} - - -void MSILWriter::printVariableDefinition(const GlobalVariable* G) { - const Constant* C = G->getInitializer(); - if (C->isNullValue() || isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) - InitListPtr = 0; - else - InitListPtr = &StaticInitList[G]; - printStaticInitializer(C,getValueName(G)); -} - - -void MSILWriter::printGlobalVariables() { - if (ModulePtr->global_empty()) return; - Module::global_iterator I,E; - for (I = ModulePtr->global_begin(), E = ModulePtr->global_end(); I!=E; ++I) { - // Variable definition - Out << ".field static " << (I->isDeclaration() ? "public " : - "private "); - if (I->isDeclaration()) { - Out << getTypeName(I->getType()) << getValueName(&*I) << "\n\n"; - } else - printVariableDefinition(&*I); - } -} - - -const char* MSILWriter::getLibraryName(const Function* F) { - return getLibraryForSymbol(F->getName(), true, F->getCallingConv()); -} - - -const char* MSILWriter::getLibraryName(const GlobalVariable* GV) { - return getLibraryForSymbol(GV->getName(), false, CallingConv::C); -} - - -const char* MSILWriter::getLibraryForSymbol(StringRef Name, bool isFunction, - CallingConv::ID CallingConv) { - // TODO: Read *.def file with function and libraries definitions. - return "MSVCRT.DLL"; -} - - -void MSILWriter::printExternals() { - Module::const_iterator I,E; - // Functions. - for (I=ModulePtr->begin(),E=ModulePtr->end(); I!=E; ++I) { - // Skip intrisics - if (I->isIntrinsic()) continue; - if (I->isDeclaration()) { - const Function* F = I; - std::string Name = getConvModopt(F->getCallingConv())+getValueName(F); - std::string Sig = - getCallSignature(cast<FunctionType>(F->getFunctionType()), NULL, Name); - Out << ".method static hidebysig pinvokeimpl(\"" - << getLibraryName(F) << "\")\n\t" << Sig << " preservesig {}\n\n"; - } - } - // External variables and static initialization. - Out << - ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)" - " native int LoadLibrary(string) preservesig {}\n" - ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)" - " native int GetProcAddress(native int, string) preservesig {}\n"; - Out << - ".method private static void* $MSIL_Import(string lib,string sym)\n" - " managed cil\n{\n" - "\tldarg\tlib\n" - "\tcall\tnative int LoadLibrary(string)\n" - "\tldarg\tsym\n" - "\tcall\tnative int GetProcAddress(native int,string)\n" - "\tdup\n" - "\tbrtrue\tL_01\n" - "\tldstr\t\"Can no import variable\"\n" - "\tnewobj\tinstance void [mscorlib]System.Exception::.ctor(string)\n" - "\tthrow\n" - "L_01:\n" - "\tret\n" - "}\n\n" - ".method static private void $MSIL_Init() managed cil\n{\n"; - printStaticInitializerList(); - // Foreach global variable. - for (Module::global_iterator I = ModulePtr->global_begin(), - E = ModulePtr->global_end(); I!=E; ++I) { - if (!I->isDeclaration() || !I->hasDLLImportLinkage()) continue; - // Use "LoadLibrary"/"GetProcAddress" to recive variable address. - std::string Tmp = getTypeName(I->getType())+getValueName(&*I); - printSimpleInstruction("ldsflda",Tmp.c_str()); - Out << "\tldstr\t\"" << getLibraryName(&*I) << "\"\n"; - Out << "\tldstr\t\"" << I->getName() << "\"\n"; - printSimpleInstruction("call","void* $MSIL_Import(string,string)"); - printIndirectSave(I->getType()); - } - printSimpleInstruction("ret"); - Out << "}\n\n"; -} - - -//===----------------------------------------------------------------------===// -// External Interface declaration -//===----------------------------------------------------------------------===// - -bool MSILTarget::addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) -{ - if (FileType != TargetMachine::CGFT_AssemblyFile) return true; - MSILWriter* Writer = new MSILWriter(o); - PM.add(createGCLoweringPass()); - // FIXME: Handle switch through native IL instruction "switch" - PM.add(createLowerSwitchPass()); - PM.add(createCFGSimplificationPass()); - PM.add(new MSILModule(Writer->UsedTypes,Writer->TD)); - PM.add(Writer); - PM.add(createGCInfoDeleter()); - return false; -} diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h deleted file mode 100644 index 92a3abe5c0a74..0000000000000 --- a/lib/Target/MSIL/MSILWriter.h +++ /dev/null @@ -1,258 +0,0 @@ -//===-- MSILWriter.h - TargetMachine for the MSIL ---------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the MSILWriter that is used by the MSIL. -// -//===----------------------------------------------------------------------===// -#ifndef MSILWRITER_H -#define MSILWRITER_H - -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Pass.h" -#include "llvm/PassManager.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - extern Target TheMSILTarget; - - class MSILModule : public ModulePass { - Module *ModulePtr; - const std::set<const Type *>*& UsedTypes; - const TargetData*& TD; - - public: - static char ID; - MSILModule(const std::set<const Type *>*& _UsedTypes, - const TargetData*& _TD) - : ModulePass(&ID), UsedTypes(_UsedTypes), TD(_TD) {} - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<FindUsedTypes>(); - AU.addRequired<TargetData>(); - } - - virtual const char *getPassName() const { - return "MSIL backend definitions"; - } - - virtual bool runOnModule(Module &M); - - }; - - class MSILWriter : public FunctionPass { - struct StaticInitializer { - const Constant* constant; - uint64_t offset; - - StaticInitializer() - : constant(0), offset(0) {} - - StaticInitializer(const Constant* _constant, uint64_t _offset) - : constant(_constant), offset(_offset) {} - }; - - uint64_t UniqID; - - uint64_t getUniqID() { - return ++UniqID; - } - - public: - formatted_raw_ostream &Out; - Module* ModulePtr; - const TargetData* TD; - LoopInfo *LInfo; - std::vector<StaticInitializer>* InitListPtr; - std::map<const GlobalVariable*,std::vector<StaticInitializer> > - StaticInitList; - const std::set<const Type *>* UsedTypes; - static char ID; - DenseMap<const Value*, unsigned> AnonValueNumbers; - unsigned NextAnonValueNumber; - - MSILWriter(formatted_raw_ostream &o) : FunctionPass(&ID), Out(o), - NextAnonValueNumber(0) { - UniqID = 0; - } - - enum ValueType { - UndefVT, - GlobalVT, - InternalVT, - ArgumentVT, - LocalVT, - ConstVT, - ConstExprVT - }; - - bool isVariable(ValueType V) { - return V==GlobalVT || V==InternalVT || V==ArgumentVT || V==LocalVT; - } - - bool isConstValue(ValueType V) { - return V==ConstVT || V==ConstExprVT; - } - - virtual const char *getPassName() const { return "MSIL backend"; } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<LoopInfo>(); - AU.setPreservesAll(); - } - - bool runOnFunction(Function &F); - - virtual bool doInitialization(Module &M); - - virtual bool doFinalization(Module &M); - - void printModuleStartup(); - - bool isZeroValue(const Value* V); - - std::string getValueName(const Value* V); - - std::string getLabelName(const Value* V); - - std::string getLabelName(const std::string& Name); - - std::string getConvModopt(CallingConv::ID CallingConvID); - - std::string getArrayTypeName(Type::TypeID TyID, const Type* Ty); - - std::string getPrimitiveTypeName(const Type* Ty, bool isSigned); - - std::string getFunctionTypeName(const Type* Ty); - - std::string getPointerTypeName(const Type* Ty); - - std::string getTypeName(const Type* Ty, bool isSigned = false, - bool isNested = false); - - ValueType getValueLocation(const Value* V); - - std::string getTypePostfix(const Type* Ty, bool Expand, - bool isSigned = false); - - void printConvToPtr(); - - void printPtrLoad(uint64_t N); - - void printValuePtrLoad(const Value* V); - - void printConstLoad(const Constant* C); - - void printValueLoad(const Value* V); - - void printValueSave(const Value* V); - - void printBinaryInstruction(const char* Name, const Value* Left, - const Value* Right); - - void printSimpleInstruction(const char* Inst, const char* Operand = NULL); - - void printPHICopy(const BasicBlock* Src, const BasicBlock* Dst); - - void printBranchToBlock(const BasicBlock* CurrBB, - const BasicBlock* TrueBB, - const BasicBlock* FalseBB); - - void printBranchInstruction(const BranchInst* Inst); - - void printSelectInstruction(const Value* Cond, const Value* VTrue, - const Value* VFalse); - - void printIndirectLoad(const Value* V); - - void printIndirectSave(const Value* Ptr, const Value* Val); - - void printIndirectSave(const Type* Ty); - - void printCastInstruction(unsigned int Op, const Value* V, - const Type* Ty, const Type* SrcTy=0); - - void printGepInstruction(const Value* V, gep_type_iterator I, - gep_type_iterator E); - - std::string getCallSignature(const FunctionType* Ty, - const Instruction* Inst, - std::string Name); - - void printFunctionCall(const Value* FnVal, const Instruction* Inst); - - void printIntrinsicCall(const IntrinsicInst* Inst); - - void printCallInstruction(const Instruction* Inst); - - void printICmpInstruction(unsigned Predicate, const Value* Left, - const Value* Right); - - void printFCmpInstruction(unsigned Predicate, const Value* Left, - const Value* Right); - - void printInvokeInstruction(const InvokeInst* Inst); - - void printSwitchInstruction(const SwitchInst* Inst); - - void printVAArgInstruction(const VAArgInst* Inst); - - void printAllocaInstruction(const AllocaInst* Inst); - - void printInstruction(const Instruction* Inst); - - void printLoop(const Loop* L); - - void printBasicBlock(const BasicBlock* BB); - - void printLocalVariables(const Function& F); - - void printFunctionBody(const Function& F); - - void printConstantExpr(const ConstantExpr* CE); - - void printStaticInitializerList(); - - void printFunction(const Function& F); - - void printDeclarations(const TypeSymbolTable& ST); - - unsigned int getBitWidth(const Type* Ty); - - void printStaticConstant(const Constant* C, uint64_t& Offset); - - void printStaticInitializer(const Constant* C, const std::string& Name); - - void printVariableDefinition(const GlobalVariable* G); - - void printGlobalVariables(); - - const char* getLibraryName(const Function* F); - - const char* getLibraryName(const GlobalVariable* GV); - - const char* getLibraryForSymbol(StringRef Name, bool isFunction, - CallingConv::ID CallingConv); - - void printExternals(); - }; - -} - -#endif - diff --git a/lib/Target/MSIL/README.TXT b/lib/Target/MSIL/README.TXT deleted file mode 100644 index d797c71fd39f0..0000000000000 --- a/lib/Target/MSIL/README.TXT +++ /dev/null @@ -1,26 +0,0 @@ -//===---------------------------------------------------------------------===// - -Vector instructions support. - -ShuffleVector -ExtractElement -InsertElement - -//===---------------------------------------------------------------------===// - -Add "OpaqueType" type. - -//===---------------------------------------------------------------------===// - -"switch" instruction emulation with CLI "switch" instruction. - -//===---------------------------------------------------------------------===// - -Write linker for external function, because function export need to know -dynamic library where function located. - -.method static hidebysig pinvokeimpl("msvcrt.dll" cdecl) - void free(void*) preservesig {} - - - diff --git a/lib/Target/MSIL/TargetInfo/CMakeLists.txt b/lib/Target/MSIL/TargetInfo/CMakeLists.txt deleted file mode 100644 index 9f0c3a09341a9..0000000000000 --- a/lib/Target/MSIL/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMSILInfo - MSILTargetInfo.cpp - ) - diff --git a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp b/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp deleted file mode 100644 index dfd42814e51cc..0000000000000 --- a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp +++ /dev/null @@ -1,26 +0,0 @@ -//===-- MSILTargetInfo.cpp - MSIL Target Implementation -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MSILWriter.h" -#include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" -using namespace llvm; - -Target llvm::TheMSILTarget; - -static unsigned MSIL_TripleMatchQuality(const std::string &TT) { - // This class always works, but shouldn't be the default in most cases. - return 1; -} - -extern "C" void LLVMInitializeMSILTargetInfo() { - TargetRegistry::RegisterTarget(TheMSILTarget, "msil", - "MSIL backend", - &MSIL_TripleMatchQuality); -} diff --git a/lib/Target/MSIL/TargetInfo/Makefile b/lib/Target/MSIL/TargetInfo/Makefile deleted file mode 100644 index 30b0950db0f75..0000000000000 --- a/lib/Target/MSIL/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/MSIL/TargetInfo/Makefile -----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMSILInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp index 68cb342b08f45..bd644435c76fa 100644 --- a/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -10,7 +10,7 @@ // This file contains a pass that scans a machine function to determine which // conditional branches need more than 10 bits of displacement to reach their // target basic block. It does this in two passes; a calculation of basic block -// positions pass, and a branch psuedo op to machine branch opcode pass. This +// positions pass, and a branch pseudo op to machine branch opcode pass. This // pass should be run last, just before the assembly printer. // //===----------------------------------------------------------------------===// @@ -30,7 +30,7 @@ STATISTIC(NumExpanded, "Number of branches expanded to long format"); namespace { struct MSP430BSel : public MachineFunctionPass { static char ID; - MSP430BSel() : MachineFunctionPass(&ID) {} + MSP430BSel() : MachineFunctionPass(ID) {} /// BlockSizes - The sizes of the basic blocks in the function. std::vector<unsigned> BlockSizes; @@ -52,7 +52,8 @@ FunctionPass *llvm::createMSP430BranchSelectionPass() { } bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) { - const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + const MSP430InstrInfo *TII = + static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo()); // Give the blocks of the function a dense, in-order, numbering. Fn.RenumberBlocks(); BlockSizes.resize(Fn.getNumBlockIDs()); diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index df28d07f5d717..bfab844f5b1a7 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -100,27 +100,6 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } bool -MSP430InstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers yet. - - switch (MI.getOpcode()) { - default: - return false; - case MSP430::MOV8rr: - case MSP430::MOV16rr: - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid register-register move instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - return true; - } -} - -bool MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, @@ -361,7 +340,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { switch (Desc.getOpcode()) { default: assert(0 && "Unknown instruction size!"); - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index ebbda1aeef513..49ccc032bf29f 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -54,10 +54,6 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - bool isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 608ca49fcf78e..3c3fa73477a59 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -101,7 +101,7 @@ bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const { MFI->isFrameAddressTaken()); } -bool MSP430RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { +bool MSP430RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } @@ -163,10 +163,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -unsigned +void MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -204,7 +203,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(BasePtr, false); if (Offset == 0) - return 0; + return; // We need to materialize the offset via add instruction. unsigned DstReg = MI.getOperand(0).getReg(); @@ -215,12 +214,11 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg) .addReg(DstReg).addImm(Offset); - return 0; + return; } MI.getOperand(i).ChangeToRegister(BasePtr, false); MI.getOperand(i+1).ChangeToImmediate(Offset); - return 0; } void diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 6e58d3116d273..4d2795bb40201 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -40,15 +40,14 @@ public: const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const; bool hasFP(const MachineFunction &MF) const; - bool hasReservedCallFrame(MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 2037a9114559e..49efe75d79d8f 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -180,7 +180,8 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, ManglerPrefixTy PrefixTy = Mangler::Default; if (GV->hasPrivateLinkage() || isImplicitlyPrivate) PrefixTy = Mangler::Private; - else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage()) + else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() || + GV->hasLinkerPrivateWeakDefAutoLinkage()) PrefixTy = Mangler::LinkerPrivate; // If this global has a name, handle it simply. diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 8ae05b75e919d..6660f6b624309 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -18,6 +18,8 @@ #include "MipsInstrInfo.h" #include "MipsTargetMachine.h" #include "MipsMachineFunction.h" +#include "llvm/BasicBlock.h" +#include "llvm/Instructions.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -75,6 +77,7 @@ namespace { } virtual void EmitFunctionBodyStart(); virtual void EmitFunctionBodyEnd(); + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; static const char *getRegisterName(unsigned RegNo); virtual void EmitFunctionEntryLabel(); @@ -227,6 +230,23 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { } +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) + const { + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *MBB->pred_begin(); + + // If the predecessor is a switch statement, assume a jump table + // implementation, so it is not a fall through. + if (const BasicBlock *bb = Pred->getBasicBlock()) + if (isa<SwitchInst>(bb->getTerminator())) + return false; + + return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); +} + // Print out an operand for an inline asm expression. bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,const char *ExtraCode, diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index aa036aef83d0b..a51c3779c7f4f 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -1,4 +1,4 @@ -//===- Mips.td - Describe the Mips Target Machine ---------------*- C++ -*-===// +//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index c2bfb8fa738c6..8f313efaf8daa 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -1,4 +1,4 @@ -//===- MipsCallingConv.td - Calling Conventions for Mips --------*- C++ -*-===// +//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index a2b615d8add2a..597ea0d6c2072 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -32,7 +32,7 @@ namespace { static char ID; Filler(TargetMachine &tm) - : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { } + : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } virtual const char *getPassName() const { return "Mips Delay Slot Filler"; diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 3888bbf09ec7a..a47cf7b4f201e 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -137,7 +137,7 @@ SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - if (Predicate_immSExt16(CN)) { + if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> @@ -184,8 +184,9 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { if (!Subtarget.isMips1() || NVT != MVT::f64) return NULL; - if (!Predicate_unindexedload(N) || - !Predicate_load(N)) + LoadSDNode *LN = cast<LoadSDNode>(N); + if (LN->getExtensionType() != ISD::NON_EXTLOAD || + LN->getAddressingMode() != ISD::UNINDEXED) return NULL; SDValue Chain = N->getOperand(0); @@ -248,8 +249,8 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { SDValue Chain = N->getOperand(0); - if (!Predicate_unindexedstore(N) || - !Predicate_store(N)) + StoreSDNode *SN = cast<StoreSDNode>(N); + if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED) return NULL; SDValue N1 = N->getOperand(1); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index b6ff2c371d5c9..b0b99bad16071 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -317,13 +317,13 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(sinkMBB); // sinkMBB: - // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] // ... BB = sinkMBB; BuildMI(*BB, BB->begin(), dl, TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB); + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -542,7 +542,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); - if (IsPIC) { + if (!IsPIC) { SDValue Ops[] = { JTI }; HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1); } else // Emit Load from Global Pointer diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index e948917eb80eb..cff79966dcd3b 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -1,4 +1,4 @@ -//===- MipsInstrFPU.td - Mips FPU Instruction Information -------*- C++ -*-===// +//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 0853272f7280e..98ae2fa7da456 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -1,4 +1,4 @@ -//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===// +//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 6c09a3e10785b..aaf307b1ce3ff 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -30,53 +30,6 @@ static bool isZeroImm(const MachineOperand &op) { return op.isImm() && op.getImm() == 0; } -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -bool MipsInstrInfo:: -isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const -{ - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - - // addu $dst, $src, $zero || addu $dst, $zero, $src - // or $dst, $src, $zero || or $dst, $zero, $src - if ((MI.getOpcode() == Mips::ADDu) || (MI.getOpcode() == Mips::OR)) { - if (MI.getOperand(1).getReg() == Mips::ZERO) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - return true; - } else if (MI.getOperand(2).getReg() == Mips::ZERO) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - } - - // mov $fpDst, $fpSrc - // mfc $gpDst, $fpSrc - // mtc $fpDst, $gpSrc - if (MI.getOpcode() == Mips::FMOV_S32 || - MI.getOpcode() == Mips::FMOV_D32 || - MI.getOpcode() == Mips::MFC1 || - MI.getOpcode() == Mips::MTC1 || - MI.getOpcode() == Mips::MOVCCRToCCR) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - - // addiu $dst, $src, 0 - if (MI.getOpcode() == Mips::ADDiu) { - if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - } - - return false; -} - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index d6f87f9b0ce82..52a3d39840ba6 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -174,12 +174,6 @@ public: /// virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 5337c9fb816a2..320c5b8834831 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -1,4 +1,4 @@ -//===- MipsInstrInfo.td - Mips Register defs --------------------*- C++ -*-===// +//===- MipsInstrInfo.td - Mips Register defs ---------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -96,12 +96,7 @@ def HI16 : SDNodeXForm<imm, [{ // Node immediate fits as 16-bit sign extended on target immediate. // e.g. addi, andi -def immSExt16 : PatLeaf<(imm), [{ - if (N->getValueType(0) == MVT::i32) - return (int32_t)N->getZExtValue() == (short)N->getZExtValue(); - else - return (int64_t)N->getZExtValue() == (short)N->getZExtValue(); -}]>; +def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; // Node immediate fits as 16-bit zero extended on target immediate. // The LO16 param means that only the lower 16 bits of the node diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index e15f0a58e501b..69436d2acb546 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -327,10 +327,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // FrameIndex represent objects inside a abstract stack. // We must replace FrameIndex with an stack/frame pointer // direct reference. -unsigned MipsRegisterInfo:: +void MipsRegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value, RegScavenger *RS) const -{ + RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); @@ -361,7 +360,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, MI.getOperand(i-1).ChangeToImmediate(Offset); MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); - return 0; } void MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index b500a650f7cc9..89282f8fa1465 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -51,9 +51,8 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { MachineBasicBlock::iterator I) const; /// Stack Frame Processing Methods - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index be78a2266268c..60efe31fbaf82 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -1,4 +1,4 @@ -//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===// +//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index 616a79bf831cb..055ff32372184 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -1,4 +1,4 @@ -//===- MipsSchedule.td - Mips Scheduling Definitions ------------*- C++ -*-===// +//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt index cd4afe8e2342a..2b6cb9e4e461d 100644 --- a/lib/Target/PIC16/CMakeLists.txt +++ b/lib/Target/PIC16/CMakeLists.txt @@ -10,7 +10,7 @@ tablegen(PIC16GenDAGISel.inc -gen-dag-isel) tablegen(PIC16GenCallingConv.inc -gen-callingconv) tablegen(PIC16GenSubtarget.inc -gen-subtarget) -add_llvm_target(PIC16 +add_llvm_target(PIC16CodeGen PIC16DebugInfo.cpp PIC16InstrInfo.cpp PIC16ISelDAGToDAG.cpp diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h index cee55f4f260fe..08bb3e6f055b6 100644 --- a/lib/Target/PIC16/PIC16.h +++ b/lib/Target/PIC16/PIC16.h @@ -58,13 +58,10 @@ namespace PIC16CC { ESNames() {} public: ~ESNames() { - std::vector<char*>::iterator it = stk.end(); - it--; - while(stk.end() != stk.begin()) + while (!stk.empty()) { - char* p = *it; + char* p = stk.back(); delete [] p; - it--; stk.pop_back(); } } diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index 54a6a28992bf9..527b31d0cc9f3 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -312,6 +312,16 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) computeRegisterProperties(); } +std::pair<const TargetRegisterClass*, uint8_t> +PIC16TargetLowering::findRepresentativeClass(EVT VT) const { + switch (VT.getSimpleVT().SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(VT); + case MVT::i16: + return std::make_pair(PIC16::FSR16RegisterClass, 1); + } +} + // getOutFlag - Extract the flag result if the Op has it. static SDValue getOutFlag(SDValue &Op) { // Flag is the last value of the node. diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h index 0a7506cb497f5..d942af46a9e92 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.h +++ b/lib/Target/PIC16/PIC16ISelLowering.h @@ -50,7 +50,7 @@ namespace llvm { CALL, // PIC16 Call instruction CALLW, // PIC16 CALLW instruction SUBCC, // Compare for equality or inequality. - SELECT_ICC, // Psuedo to be caught in schedular and expanded to brcond. + SELECT_ICC, // Pseudo to be caught in scheduler and expanded to brcond. BRCOND, // Conditional branch. RET, // Return. Dummy @@ -181,6 +181,9 @@ namespace llvm { // FIXME: The function never seems to be aligned. return 1; } + protected: + std::pair<const TargetRegisterClass*, uint8_t> + findRepresentativeClass(EVT VT) const; private: // If the Node is a BUILD_PAIR representing a direct Address, // then this function will return true. diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index e784f746f7f9b..81257f3c41083 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -167,21 +167,6 @@ void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); } -bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DestReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - - if (MI.getOpcode() == PIC16::copy_fsr - || MI.getOpcode() == PIC16::copy_w) { - DestReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - - return false; -} - /// InsertBranch - Insert a branch into the end of the specified /// MachineBasicBlock. This operands to this method are the same as those /// returned by AnalyzeBranch. This is invoked in cases where AnalyzeBranch diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h index a3a77f11ba160..661b335d3b6c5 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.h +++ b/lib/Target/PIC16/PIC16InstrInfo.h @@ -61,10 +61,6 @@ public: MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp index 241170b11c2ad..b6aa38f765ea7 100644 --- a/lib/Target/PIC16/PIC16MemSelOpt.cpp +++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp @@ -38,7 +38,7 @@ using namespace llvm; namespace { struct MemSelOpt : public MachineFunctionPass { static char ID; - MemSelOpt() : MachineFunctionPass(&ID) {} + MemSelOpt() : MachineFunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreservedID(MachineLoopInfoID); diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp index 27f1cf572ae6c..56f0211570928 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp @@ -256,7 +256,7 @@ PIC16Cloner::cloneFunction(Function *OrgF) { CloneAutos(OrgF); // Now create the clone. - ClonedF = CloneFunction(OrgF, VMap); + ClonedF = CloneFunction(OrgF, VMap, /*ModuleLevelChanges=*/false); // The new function should be for interrupt line. Therefore should have // the name suffixed with IL and section attribute marked with IL. diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h index e8b5aa45cdca9..e7d67ce09629a 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h @@ -35,7 +35,7 @@ namespace llvm { class PIC16Cloner : public ModulePass { public: static char ID; // Class identification - PIC16Cloner() : ModulePass(&ID) {} + PIC16Cloner() : ModulePass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<CallGraph>(); diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp index 5ecb6aa551576..0f8928a4b5f50 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp +++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp @@ -171,8 +171,9 @@ void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) { for (Module::iterator MI = M.begin(), E = M.end(); MI != E; ++MI) { for (Value::use_iterator I = MI->use_begin(), E = MI->use_end(); I != E; ++I) { - if ((!isa<CallInst>(I) && !isa<InvokeInst>(I)) - || !CallSite(cast<Instruction>(I)).isCallee(I)) { + User *U = *I; + if ((!isa<CallInst>(U) && !isa<InvokeInst>(U)) + || !CallSite(cast<Instruction>(U)).isCallee(I)) { setColor(MI, ++IndirectCallColor); break; } diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h index 5a2551fabcda9..2f611e65de1fd 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h +++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h @@ -39,7 +39,7 @@ namespace llvm { unsigned IndirectCallColor; public: static char ID; // Class identification - PIC16Overlay() : ModulePass(&ID) { + PIC16Overlay() : ModulePass(ID) { OverlayStr = "Overlay="; InterruptDepth = PIC16OVERLAY::StartInterruptColor; IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor; diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp index dff98d12c2ae0..76de47fdf0f4a 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.cpp +++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp @@ -44,13 +44,10 @@ bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const { return false; } -unsigned PIC16RegisterInfo:: +void PIC16RegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - FrameIndexValue *Value, RegScavenger *RS) const -{ - /* NOT YET IMPLEMENTED */ - return 0; -} + RegScavenger *RS) const +{ /* NOT YET IMPLEMENTED */ } void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const { /* NOT YET IMPLEMENTED */ } diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h index 5536a617d2beb..20052b0034428 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.h +++ b/lib/Target/PIC16/PIC16RegisterInfo.h @@ -44,9 +44,8 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo { virtual BitVector getReservedRegs(const MachineFunction &MF) const; virtual bool hasFP(const MachineFunction &MF) const; - virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS=NULL) const; + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, RegScavenger *RS=NULL) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index e35dc579f2cd5..c1a5663be9315 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -43,6 +43,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -327,6 +328,19 @@ namespace { void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O, const char *Modifier); + + MachineLocation getDebugValueLocation(const MachineInstr *MI) const { + + MachineLocation Location; + assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm()); + else { + DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + } + return Location; + } }; /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 52948c868b9c9..e161d23600e27 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -10,7 +10,7 @@ // This file contains a pass that scans a machine function to determine which // conditional branches need more than 16 bits of displacement to reach their // target basic block. It does this in two passes; a calculation of basic block -// positions pass, and a branch psuedo op to machine branch opcode pass. This +// positions pass, and a branch pseudo op to machine branch opcode pass. This // pass should be run last, just before the assembly printer. // //===----------------------------------------------------------------------===// @@ -31,7 +31,7 @@ STATISTIC(NumExpanded, "Number of branches expanded to long format"); namespace { struct PPCBSel : public MachineFunctionPass { static char ID; - PPCBSel() : MachineFunctionPass(&ID) {} + PPCBSel() : MachineFunctionPass(ID) {} /// BlockSizes - The sizes of the basic blocks in the function. std::vector<unsigned> BlockSizes; @@ -53,7 +53,8 @@ FunctionPass *llvm::createPPCBranchSelectionPass() { } bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { - const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + const PPCInstrInfo *TII = + static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo()); // Give the blocks of the function a dense, in-order, numbering. Fn.RenumberBlocks(); BlockSizes.resize(Fn.getNumBlockIDs()); diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 155fba22d9d7b..441db94581aea 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -1,4 +1,4 @@ -//===- PPCCallingConv.td - Calling Conventions for PowerPC ------*- C++ -*-===// +//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 361fa70fb4c45..df9ab52389ba8 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -45,7 +45,7 @@ namespace { public: PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(&ID), TM(tm), MCE(mce) {} + : MachineFunctionPass(ID), TM(tm), MCE(mce) {} /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for @@ -110,7 +110,7 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { default: MCE.emitWordBE(getBinaryCodeForInstr(MI)); break; - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index d47d989b34c00..14d1b154a5c9c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2467,18 +2467,31 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin; - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every - // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol - // node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - Callee.getValueType()); - else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); - else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) + bool needIndirectCall = true; + if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { // If this is an absolute destination address, use the munged value. Callee = SDValue(Dest, 0); - else { + needIndirectCall = false; + } + // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 + // Use indirect calls for ALL functions calls in JIT mode, since the + // far-call stubs may be outside relocation limits for a BL instruction. + if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + Callee.getValueType()); + needIndirectCall = false; + } + } + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), + Callee.getValueType()); + needIndirectCall = false; + } + if (needIndirectCall) { // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; @@ -3942,17 +3955,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } // t = vsplti c, result = vsldoi t, t, 1 - if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { + if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 - if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { + if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 - if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { + if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); } diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 1574aa3fb23a5..c17108fa92309 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -18,8 +18,11 @@ #include "PPCGenInstrInfo.inc" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -36,67 +39,6 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm), RI(*TM.getSubtargetImpl(), *this) {} -bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned& sourceReg, - unsigned& destReg, - unsigned& sourceSubIdx, - unsigned& destSubIdx) const { - sourceSubIdx = destSubIdx = 0; // No sub-registers. - - unsigned oc = MI.getOpcode(); - if (oc == PPC::OR || oc == PPC::OR8 || oc == PPC::VOR || - oc == PPC::OR4To8 || oc == PPC::OR8To4) { // or r1, r2, r2 - assert(MI.getNumOperands() >= 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isReg() && - "invalid PPC OR instruction!"); - if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - } else if (oc == PPC::ADDI) { // addi r1, r2, 0 - assert(MI.getNumOperands() >= 3 && - MI.getOperand(0).isReg() && - MI.getOperand(2).isImm() && - "invalid PPC ADDI instruction!"); - if (MI.getOperand(1).isReg() && MI.getOperand(2).getImm() == 0) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - } else if (oc == PPC::ORI) { // ori r1, r2, 0 - assert(MI.getNumOperands() >= 3 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - MI.getOperand(2).isImm() && - "invalid PPC ORI instruction!"); - if (MI.getOperand(2).getImm() == 0) { - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - } else if (oc == PPC::FMR || oc == PPC::FMRSD) { // fmr r1, r2 - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid PPC FMR instruction"); - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } else if (oc == PPC::MCRF) { // mcrf cr1, cr2 - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid PPC MCRF instruction"); - sourceReg = MI.getOperand(1).getReg(); - destReg = MI.getOperand(0).getReg(); - return true; - } - return false; -} - unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { @@ -524,6 +466,14 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); + + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, /*Offset=*/0, + MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); + NewMIs.back()->addMemOperand(MF, MMO); } void @@ -637,6 +587,14 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); + + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, /*Offset=*/0, + MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); + NewMIs.back()->addMemOperand(MF, MMO); } MachineInstr* @@ -667,7 +625,7 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { const char *AsmStr = MI->getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); } - case PPC::DBG_LABEL: + case PPC::PROLOG_LABEL: case PPC::EH_LABEL: case PPC::GC_LABEL: case PPC::DBG_VALUE: diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index eadb21e217024..fc7b7b3cb8972 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -82,12 +82,6 @@ public: /// virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 63b4581a37f9a..eb100ec75280a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1022,9 +1022,7 @@ let Uses = [RM] in { } } -/// FMR is split into 2 versions, one for 4/8 byte FP, and one for extending. -/// -/// Note that these are defined as pseudo-ops on the PPC970 because they are +/// Note that FMR is defined as pseudo-ops on the PPC970 because they are /// often coalesced away and we don't want the dispatch group builder to think /// that they will fill slots (which could cause the load of a LSU reject to /// sneak into a d-group with a store). @@ -1032,10 +1030,6 @@ def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), "fmr $frD, $frB", FPGeneral, []>, // (set F4RC:$frD, F4RC:$frB) PPC970_Unit_Pseudo; -def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB), - "fmr $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fextend F4RC:$frB))]>, - PPC970_Unit_Pseudo; let PPC970_Unit = 3 in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. @@ -1476,10 +1470,13 @@ def : Pat<(extloadi16 iaddr:$src), (LHZ iaddr:$src)>; def : Pat<(extloadi16 xaddr:$src), (LHZX xaddr:$src)>; -def : Pat<(extloadf32 iaddr:$src), - (FMRSD (LFS iaddr:$src))>; -def : Pat<(extloadf32 xaddr:$src), - (FMRSD (LFSX xaddr:$src))>; +def : Pat<(f64 (extloadf32 iaddr:$src)), + (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>; +def : Pat<(f64 (extloadf32 xaddr:$src)), + (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; + +def : Pat<(f64 (fextend F4RC:$src)), + (COPY_TO_REGCLASS F4RC:$src, F8RC)>; // Memory barriers def : Pat<(membarrier (i32 imm /*ll*/), diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 4d6132a9ec50c..653e143ba407f 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -449,8 +449,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Get stack alignments. unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); - assert(MaxAlign <= TargetAlign && - "Dynamic alloca with large aligns not supported"); + if (MaxAlign > TargetAlign) + report_fatal_error("Dynamic alloca with large aligns not supported"); // Determine the previous frame's address. If FrameSize can't be // represented as 16 bits or we need special alignment, then we load the @@ -580,10 +580,9 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, MBB.erase(II); } -unsigned +void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); // Get the instruction. @@ -622,14 +621,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (FPSI && FrameIndex == FPSI && (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { lowerDynamicAlloc(II, SPAdj, RS); - return 0; + return; } // Special case for pseudo-op SPILL_CR. if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default. if (OpC == PPC::SPILL_CR) { lowerCRSpilling(II, FrameIndex, SPAdj, RS); - return 0; + return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). @@ -674,7 +673,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); - return 0; + return; } // The offset doesn't fit into a single register, scavenge one to build the @@ -710,11 +709,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else { OperandBase = OffsetOperandNo; } - + unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); - return 0; } /// VRRegNo - Map from a numbered VR register to its enum value. @@ -1318,7 +1316,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel); // Show update of SP. if (NegFrameSize) { @@ -1361,7 +1359,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { ReadyLabel = MMI.getContext().CreateTempSymbol(); // Mark effective beginning of when frame pointer is ready. - BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(ReadyLabel); + BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel); MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) : (isPPC64 ? PPC::X1 : PPC::R1)); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index f026847a540b3..890b24b9c0a8b 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -63,9 +63,8 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 40914ba62a70e..5d46065d96f22 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -69,6 +69,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS, , HasFSQRT(false) , HasSTFIWX(false) , HasLazyResolverStubs(false) + , IsJITCodeModel(false) , DarwinVers(0) { // Determine default and user specified characteristics @@ -117,6 +118,9 @@ void PPCSubtarget::SetJITMode() { // everything is. This matters for PPC64, which codegens in PIC mode without // stubs. HasLazyResolverStubs = false; + + // Calls to external functions need to use indirect calls + IsJITCodeModel = true; } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 75fcf6238a27a..00ec7474c9e39 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -63,6 +63,7 @@ protected: bool HasFSQRT; bool HasSTFIWX; bool HasLazyResolverStubs; + bool IsJITCodeModel; /// DarwinVers - Nonzero if this is a darwin platform. Otherwise, the numeric /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. @@ -124,6 +125,9 @@ public: bool hasLazyResolverStub(const GlobalValue *GV, const TargetMachine &TM) const; + // isJITCodeModel - True if we're generating code for the JIT + bool isJITCodeModel() const { return IsJITCodeModel; } + // Specific obvious features. bool hasFSQRT() const { return HasFSQRT; } bool hasSTFIWX() const { return HasSTFIWX; } diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 4d7ee08de1dee..4faf8bcfd4199 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -1919,5 +1919,21 @@ something like the following, which eliminates a branch: ret .LBB0_2: jmp foo # TAILCALL +//===---------------------------------------------------------------------===// +Given a branch where the two target blocks are identical ("ret i32 %b" in +both), simplifycfg will simplify them away. But not so for a switch statement: + +define i32 @f(i32 %a, i32 %b) nounwind readnone { +entry: + switch i32 %a, label %bb3 [ + i32 4, label %bb + i32 6, label %bb + ] +bb: ; preds = %entry, %entry + ret i32 %b + +bb3: ; preds = %entry + ret i32 %b +} //===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 9e148ada8853e..aae5da8560056 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -32,7 +32,7 @@ namespace { static char ID; Filler(TargetMachine &tm) - : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { } + : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } virtual const char *getPassName() const { return "SPARC Delay Slot Filler"; diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp index 88b0927b35500..1423b1e64d66e 100644 --- a/lib/Target/Sparc/FPMover.cpp +++ b/lib/Target/Sparc/FPMover.cpp @@ -36,7 +36,7 @@ namespace { static char ID; explicit FPMover(TargetMachine &tm) - : MachineFunctionPass(&ID), TM(tm) { } + : MachineFunctionPass(ID), TM(tm) { } virtual const char *getPassName() const { return "Sparc Double-FP Move Fixer"; diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td index 925d782d988be..764336665d0bb 100644 --- a/lib/Target/Sparc/Sparc.td +++ b/lib/Target/Sparc/Sparc.td @@ -1,4 +1,4 @@ -//===- Sparc.td - Describe the Sparc Target Machine -------------*- C++ -*-===// +//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 698923e3c9e08..4ea94c4cb560f 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -84,7 +84,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr, if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - if (Predicate_simm13(CN)) { + if (isInt<13>(CN->getSExtValue())) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { // Constant offset from frame ref. @@ -120,9 +120,9 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr, return false; // direct calls. if (Addr.getOpcode() == ISD::ADD) { - if (isa<ConstantSDNode>(Addr.getOperand(1)) && - Predicate_simm13(Addr.getOperand(1).getNode())) - return false; // Let the reg+imm pattern catch this! + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) + if (isInt<13>(CN->getSExtValue())) + return false; // Let the reg+imm pattern catch this! if (Addr.getOperand(0).getOpcode() == SPISD::Lo || Addr.getOperand(1).getOpcode() == SPISD::Lo) return false; // Let the reg+imm pattern catch this! diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 3a4c80ad076a4..7ede8e7ebbe46 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -28,46 +28,6 @@ SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST) RI(ST, *this), Subtarget(ST) { } -static bool isZeroImm(const MachineOperand &op) { - return op.isImm() && op.getImm() == 0; -} - -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -/// -bool SparcInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSR, unsigned &DstSR) const { - SrcSR = DstSR = 0; // No sub-registers. - - // We look for 3 kinds of patterns here: - // or with G0 or 0 - // add with G0 or 0 - // fmovs or FpMOVD (pseudo double move). - if (MI.getOpcode() == SP::ORrr || MI.getOpcode() == SP::ADDrr) { - if (MI.getOperand(1).getReg() == SP::G0) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(2).getReg(); - return true; - } else if (MI.getOperand(2).getReg() == SP::G0) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - } else if ((MI.getOpcode() == SP::ORri || MI.getOpcode() == SP::ADDri) && - isZeroImm(MI.getOperand(2)) && MI.getOperand(1).isReg()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } else if (MI.getOpcode() == SP::FMOVS || MI.getOpcode() == SP::FpMOVD || - MI.getOpcode() == SP::FMOVD) { - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - return true; - } - return false; -} - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index 133471857bad6..c00bd2198765c 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -43,12 +43,6 @@ public: /// virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index ddadd51a93a42..467ed48487adf 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -43,17 +43,9 @@ def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">; // Instruction Pattern Stuff //===----------------------------------------------------------------------===// -def simm11 : PatLeaf<(imm), [{ - // simm11 predicate - True if the imm fits in a 11-bit sign extended field. - return (((int)N->getZExtValue() << (32-11)) >> (32-11)) == - (int)N->getZExtValue(); -}]>; +def simm11 : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>; -def simm13 : PatLeaf<(imm), [{ - // simm13 predicate - True if the imm fits in a 13-bit sign extended field. - return (((int)N->getZExtValue() << (32-13)) >> (32-13)) == - (int)N->getZExtValue(); -}]>; +def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>; def LO10 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023, diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 427cc7fd45774..c85db20d2b748 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -69,10 +69,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -unsigned +void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -108,7 +107,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(SP::G1, false); MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1)); } - return 0; } void SparcRegisterInfo:: diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 9f0cda707b3ec..020ce567c9568 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -40,9 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index c03864fe41e4f..367bed3a85395 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -141,31 +141,6 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); } -bool -SystemZInstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - switch (MI.getOpcode()) { - default: - return false; - case SystemZ::MOV32rr: - case SystemZ::MOV64rr: - case SystemZ::MOV64rrP: - case SystemZ::MOV128rr: - case SystemZ::FMOV32rr: - case SystemZ::FMOV64rr: - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid register-register move instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SrcSubIdx = MI.getOperand(1).getSubReg(); - DstSubIdx = MI.getOperand(0).getSubReg(); - return true; - } -} - unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 0559619248a63..c248f2489c493 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -65,9 +65,6 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - bool isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index ae96b0b08ff62..f8d3e6ac8a6fb 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -92,10 +92,9 @@ int SystemZRegisterInfo::getFrameIndexOffset(const MachineFunction &MF, return Offset; } -unsigned +void SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unxpected"); unsigned i = 0; @@ -117,13 +116,13 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Offset is a either 12-bit unsigned or 20-bit signed integer. // FIXME: handle "too long" displacements. - int Offset = getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm(); + int Offset = + getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm(); // Check whether displacement is too long to fit into 12 bit zext field. MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset)); MI.getOperand(i+1).ChangeToImmediate(Offset); - return 0; } void diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 670025f86e08f..5dae865cb79a0 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -34,7 +34,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasReservedCallFrame(MachineFunction &MF) const { return true; } + bool hasReservedCallFrame(const MachineFunction &MF) const { return true; } bool hasFP(const MachineFunction &MF) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; @@ -43,9 +43,8 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 5870d8a87004d..f35c96dadcee5 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -34,8 +34,7 @@ using namespace llvm; // Handle the Pass registration stuff necessary to use TargetData's. // Register the default SparcV9 implementation... -static RegisterPass<TargetData> X("targetdata", "Target Data Layout", false, - true); +INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true); char TargetData::ID = 0; //===----------------------------------------------------------------------===// @@ -98,8 +97,8 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { //===----------------------------------------------------------------------===// TargetAlignElem -TargetAlignElem::get(AlignTypeEnum align_type, unsigned char abi_align, - unsigned char pref_align, uint32_t bit_width) { +TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align, + unsigned pref_align, uint32_t bit_width) { assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); TargetAlignElem retval; retval.AlignType = align_type; @@ -197,10 +196,10 @@ void TargetData::init(StringRef Desc) { } unsigned Size = getInt(Specifier.substr(1)); Split = Token.split(':'); - unsigned char ABIAlign = getInt(Split.first) / 8; + unsigned ABIAlign = getInt(Split.first) / 8; Split = Split.second.split(':'); - unsigned char PrefAlign = getInt(Split.first) / 8; + unsigned PrefAlign = getInt(Split.first) / 8; if (PrefAlign == 0) PrefAlign = ABIAlign; setAlignment(AlignType, ABIAlign, PrefAlign, Size); @@ -227,19 +226,19 @@ void TargetData::init(StringRef Desc) { /// /// @note This has to exist, because this is a pass, but it should never be /// used. -TargetData::TargetData() : ImmutablePass(&ID) { +TargetData::TargetData() : ImmutablePass(ID) { report_fatal_error("Bad TargetData ctor used. " "Tool did not specify a TargetData to use?"); } TargetData::TargetData(const Module *M) - : ImmutablePass(&ID) { + : ImmutablePass(ID) { init(M->getDataLayout()); } void -TargetData::setAlignment(AlignTypeEnum align_type, unsigned char abi_align, - unsigned char pref_align, uint32_t bit_width) { +TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align, + unsigned pref_align, uint32_t bit_width) { assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { if (Alignments[i].AlignType == align_type && @@ -455,15 +454,6 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. return getStructLayout(cast<StructType>(Ty))->getSizeInBits(); - case Type::UnionTyID: { - const UnionType *UnTy = cast<UnionType>(Ty); - uint64_t Size = 0; - for (UnionType::element_iterator i = UnTy->element_begin(), - e = UnTy->element_end(); i != e; ++i) { - Size = std::max(Size, getTypeSizeInBits(*i)); - } - return Size; - } case Type::IntegerTyID: return cast<IntegerType>(Ty)->getBitWidth(); case Type::VoidTyID: @@ -496,7 +486,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref == false) for the requested type \a Ty. */ -unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { +unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { int AlignType = -1; assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); @@ -518,18 +508,7 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { // Get the layout annotation... which is lazily created on demand. const StructLayout *Layout = getStructLayout(cast<StructType>(Ty)); unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty); - return std::max(Align, (unsigned)Layout->getAlignment()); - } - case Type::UnionTyID: { - const UnionType *UnTy = cast<UnionType>(Ty); - unsigned Align = 1; - - // Unions need the maximum alignment of all their entries - for (UnionType::element_iterator i = UnTy->element_begin(), - e = UnTy->element_end(); i != e; ++i) { - Align = std::max(Align, (unsigned)getAlignment(*i, abi_or_pref)); - } - return Align; + return std::max(Align, Layout->getAlignment()); } case Type::IntegerTyID: case Type::VoidTyID: @@ -556,18 +535,18 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { abi_or_pref, Ty); } -unsigned char TargetData::getABITypeAlignment(const Type *Ty) const { +unsigned TargetData::getABITypeAlignment(const Type *Ty) const { return getAlignment(Ty, true); } /// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for /// an integer type of the specified bitwidth. -unsigned char TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const { +unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const { return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0); } -unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const { +unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const { for (unsigned i = 0, e = Alignments.size(); i != e; ++i) if (Alignments[i].AlignType == STACK_ALIGN) return Alignments[i].ABIAlign; @@ -575,12 +554,12 @@ unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const { return getABITypeAlignment(Ty); } -unsigned char TargetData::getPrefTypeAlignment(const Type *Ty) const { +unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const { return getAlignment(Ty, false); } -unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const { - unsigned Align = (unsigned) getPrefTypeAlignment(Ty); +unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const { + unsigned Align = getPrefTypeAlignment(Ty); assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); return Log2_32(Align); } @@ -615,18 +594,13 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, // Update Ty to refer to current element Ty = STy->getElementType(FieldNo); - } else if (const UnionType *UnTy = dyn_cast<UnionType>(*TI)) { - unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue(); - - // Offset into union is canonically 0, but type changes - Ty = UnTy->getElementType(FieldNo); } else { // Update Ty to refer to current element Ty = cast<SequentialType>(Ty)->getElementType(); // Get the array index and the size of each array element. if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue()) - Result += arrayIdx * (int64_t)getTypeAllocSize(Ty); + Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty); } } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 47c91df1400e6..705b1c097e55f 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -30,7 +30,8 @@ namespace llvm { bool NoFramePointerElimNonLeaf; bool NoExcessFPPrecision; bool UnsafeFPMath; - bool FiniteOnlyFPMathOption; + bool NoInfsFPMath; + bool NoNaNsFPMath; bool HonorSignDependentRoundingFPMathOption; bool UseSoftFloat; FloatABI::ABIType FloatABIType; @@ -80,9 +81,14 @@ EnableUnsafeFPMath("enable-unsafe-fp-math", cl::location(UnsafeFPMath), cl::init(false)); static cl::opt<bool, true> -EnableFiniteOnlyFPMath("enable-finite-only-fp-math", - cl::desc("Enable optimizations that assumes non- NaNs / +-Infs"), - cl::location(FiniteOnlyFPMathOption), +EnableNoInfsFPMath("enable-no-infs-fp-math", + cl::desc("Enable FP math optimizations that assume no +-Infs"), + cl::location(NoInfsFPMath), + cl::init(false)); +static cl::opt<bool, true> +EnableNoNaNsFPMath("enable-no-nans-fp-math", + cl::desc("Enable FP math optimizations that assume no NaNs"), + cl::location(NoNaNsFPMath), cl::init(false)); static cl::opt<bool, true> EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math", @@ -290,12 +296,6 @@ namespace llvm { /// result is "less precise" than doing those operations individually. bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; } - /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math - /// option is specified on the command line. If this returns false (default), - /// the code generator is not allowed to assume that FP arithmetic arguments - /// and results are never NaNs or +-Infs. - bool FiniteOnlyFPMath() { return FiniteOnlyFPMathOption; } - /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume /// that the rounding mode of the FPU can change from its default. bool HonorSignDependentRoundingFPMath() { diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index 49bfad54136d3..55f222c7c1c95 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -63,7 +63,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { /// getAllocatableSetForRC - Toggle the bits that represent allocatable /// registers for the specific register class. static void getAllocatableSetForRC(const MachineFunction &MF, - const TargetRegisterClass *RC, BitVector &R){ + const TargetRegisterClass *RC, BitVector &R){ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), E = RC->allocation_order_end(MF); I != E; ++I) R.set(*I); @@ -74,12 +74,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, BitVector Allocatable(NumRegs); if (RC) { getAllocatableSetForRC(MF, RC, Allocatable); - return Allocatable; + } else { + for (TargetRegisterInfo::regclass_iterator I = regclass_begin(), + E = regclass_end(); I != E; ++I) + getAllocatableSetForRC(MF, *I, Allocatable); } - for (TargetRegisterInfo::regclass_iterator I = regclass_begin(), - E = regclass_end(); I != E; ++I) - getAllocatableSetForRC(MF, *I, Allocatable); + // Mask out the reserved registers + BitVector Reserved = getReservedRegs(MF); + Allocatable ^= Reserved & Allocatable; + return Allocatable; } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index f1e66ab9d2c3f..f8588d818b75d 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -9,6 +9,8 @@ #include "llvm/Target/TargetAsmParser.h" #include "X86.h" +#include "X86Subtarget.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" @@ -19,6 +21,7 @@ #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmParser.h" using namespace llvm; @@ -28,6 +31,7 @@ struct X86Operand; class X86ATTAsmParser : public TargetAsmParser { MCAsmParser &Parser; + TargetMachine &TM; protected: unsigned Is64Bit : 1; @@ -37,8 +41,6 @@ private: MCAsmLexer &getLexer() const { return Parser.getLexer(); } - void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } - bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); @@ -48,13 +50,14 @@ private: bool ParseDirectiveWord(unsigned Size, SMLoc L); - void InstructionCleanup(MCInst &Inst); + bool MatchInstruction(SMLoc IDLoc, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCInst &Inst); - /// @name Auto-generated Match Functions + /// @name Auto-generated Matcher Functions /// { - bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCInst &Inst); + unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const; bool MatchInstructionImpl( const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst); @@ -62,27 +65,32 @@ private: /// } public: - X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) - : TargetAsmParser(T), Parser(_Parser) {} + X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) + : TargetAsmParser(T), Parser(_Parser), TM(TM) { + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures( + &TM.getSubtarget<X86Subtarget>())); + } virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); }; - + class X86_32ATTAsmParser : public X86ATTAsmParser { public: - X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser) - : X86ATTAsmParser(T, _Parser) { + X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) + : X86ATTAsmParser(T, _Parser, TM) { Is64Bit = false; } }; class X86_64ATTAsmParser : public X86ATTAsmParser { public: - X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser) - : X86ATTAsmParser(T, _Parser) { + X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) + : X86ATTAsmParser(T, _Parser, TM) { Is64Bit = true; } }; @@ -90,7 +98,7 @@ public: } // end anonymous namespace /// @name Auto-generated Match Functions -/// { +/// { static unsigned MatchRegisterName(StringRef Name); @@ -109,7 +117,7 @@ struct X86Operand : public MCParsedAsmOperand { } Kind; SMLoc StartLoc, EndLoc; - + union { struct { const char *Data; @@ -141,6 +149,8 @@ struct X86Operand : public MCParsedAsmOperand { /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } + virtual void dump(raw_ostream &OS) const {} + StringRef getToken() const { assert(Kind == Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); @@ -185,7 +195,7 @@ struct X86Operand : public MCParsedAsmOperand { bool isToken() const {return Kind == Token; } bool isImm() const { return Kind == Immediate; } - + bool isImmSExti16i8() const { if (!isImm()) return false; @@ -260,10 +270,6 @@ struct X86Operand : public MCParsedAsmOperand { !getMemIndexReg() && getMemScale() == 1; } - bool isNoSegMem() const { - return Kind == Memory && !getMemSegReg(); - } - bool isReg() const { return Kind == Register; } void addExpr(MCInst &Inst, const MCExpr *Expr) const { @@ -298,14 +304,6 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); } - void addNoSegMemOperands(MCInst &Inst, unsigned N) const { - assert((N == 4) && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); - Inst.addOperand(MCOperand::CreateImm(getMemScale())); - Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); - addExpr(Inst, getMemDisp()); - } - static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { X86Operand *Res = new X86Operand(Token, Loc, Loc); Res->Tok.Data = Str.data(); @@ -376,13 +374,19 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, // FIXME: Validate register for the current architecture; we have to do // validation later, so maybe there is no need for this here. RegNo = MatchRegisterName(Tok.getString()); - + + // FIXME: This should be done using Requires<In32BitMode> and + // Requires<In64BitMode> so "eiz" usage in 64-bit instructions + // can be also checked. + if (RegNo == X86::RIZ && !Is64Bit) + return Error(Tok.getLoc(), "riz register in 64-bit mode only"); + // Parse %st(1) and "%st" as "%st(0)" if (RegNo == 0 && Tok.getString() == "st") { RegNo = X86::ST0; EndLoc = Tok.getLoc(); Parser.Lex(); // Eat 'st' - + // Check to see if we have '(4)' after %st. if (getLexer().isNot(AsmToken::LParen)) return false; @@ -403,15 +407,15 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, case 7: RegNo = X86::ST7; break; default: return Error(IntTok.getLoc(), "invalid stack index"); } - + if (getParser().Lex().isNot(AsmToken::RParen)) return Error(Parser.getTok().getLoc(), "expected ')'"); - + EndLoc = Tok.getLoc(); Parser.Lex(); // Eat ')' return false; } - + // If this is "db[0-7]", match it as an alias // for dr[0-7]. if (RegNo == 0 && Tok.getString().size() == 3 && @@ -426,14 +430,14 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, case '6': RegNo = X86::DR6; break; case '7': RegNo = X86::DR7; break; } - + if (RegNo != 0) { EndLoc = Tok.getLoc(); Parser.Lex(); // Eat it. return false; } } - + if (RegNo == 0) return Error(Tok.getLoc(), "invalid register name"); @@ -452,13 +456,17 @@ X86Operand *X86ATTAsmParser::ParseOperand() { unsigned RegNo; SMLoc Start, End; if (ParseRegister(RegNo, Start, End)) return 0; - + if (RegNo == X86::EIZ || RegNo == X86::RIZ) { + Error(Start, "eiz and riz can only be used as index registers"); + return 0; + } + // If this is a segment register followed by a ':', then this is the start // of a memory reference, otherwise this is a normal register reference. if (getLexer().isNot(AsmToken::Colon)) return X86Operand::CreateReg(RegNo, Start, End); - - + + getParser().Lex(); // Eat the colon. return ParseMemOperand(RegNo, Start); } @@ -477,7 +485,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() { /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix /// has already been parsed if present. X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { - + // We have to disambiguate a parenthesized expression "(4+5)" from the start // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The // only way to do this without lookahead is to eat the '(' and see what is @@ -486,7 +494,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; if (getParser().ParseExpression(Disp, ExprEnd)) return 0; - + // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. if (getLexer().isNot(AsmToken::LParen)) { @@ -495,7 +503,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { return X86Operand::CreateMem(Disp, MemStart, ExprEnd); return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); } - + // Eat the '('. Parser.Lex(); } else { @@ -503,17 +511,17 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // so we have to eat the ( to see beyond it. SMLoc LParenLoc = Parser.getTok().getLoc(); Parser.Lex(); // Eat the '('. - + if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { // Nothing to do here, fall into the code below with the '(' part of the // memory operand consumed. } else { SMLoc ExprEnd; - + // It must be an parenthesized expression, parse it now. if (getParser().ParseParenExpression(Disp, ExprEnd)) return 0; - + // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. if (getLexer().isNot(AsmToken::LParen)) { @@ -522,21 +530,25 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); } - + // Eat the '('. Parser.Lex(); } } - + // If we reached here, then we just ate the ( of the memory operand. Process // the rest of the memory operand. unsigned BaseReg = 0, IndexReg = 0, Scale = 1; - + if (getLexer().is(AsmToken::Percent)) { SMLoc L; if (ParseRegister(BaseReg, L, L)) return 0; + if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { + Error(L, "eiz and riz can only be used as index registers"); + return 0; + } } - + if (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. @@ -545,11 +557,11 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // correctly. // // Not that even though it would be completely consistent to support syntax - // like "1(%eax,,1)", the assembler doesn't. + // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. if (getLexer().is(AsmToken::Percent)) { SMLoc L; if (ParseRegister(IndexReg, L, L)) return 0; - + if (getLexer().isNot(AsmToken::RParen)) { // Parse the scale amount: // ::= ',' [scale-expression] @@ -566,7 +578,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { int64_t ScaleVal; if (getParser().ParseAbsoluteExpression(ScaleVal)) return 0; - + // Validate the scale amount. if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); @@ -576,19 +588,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { } } } else if (getLexer().isNot(AsmToken::RParen)) { - // Otherwise we have the unsupported form of a scale amount without an + // A scale amount without an index is ignored. // index. SMLoc Loc = Parser.getTok().getLoc(); int64_t Value; if (getParser().ParseAbsoluteExpression(Value)) return 0; - - Error(Loc, "cannot have scale factor without index register"); - return 0; + + if (Value != 1) + Warning(Loc, "scale factor without index register is ignored"); + Scale = 1; } } - + // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. if (getLexer().isNot(AsmToken::RParen)) { Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); @@ -596,7 +609,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { } SMLoc MemEnd = Parser.getTok().getLoc(); Parser.Lex(); // Eat the ')'. - + return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, MemStart, MemEnd); } @@ -743,6 +756,23 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, } } } + + // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq + if (PatchedName.startswith("vpclmul")) { + unsigned CLMULQuadWordSelect = StringSwitch<unsigned>( + PatchedName.slice(7, PatchedName.size() - 2)) + .Case("lqlq", 0x00) // src1[63:0], src2[63:0] + .Case("hqlq", 0x01) // src1[127:64], src2[63:0] + .Case("lqhq", 0x10) // src1[63:0], src2[127:64] + .Case("hqhq", 0x11) // src1[127:64], src2[127:64] + .Default(~0U); + if (CLMULQuadWordSelect != ~0U) { + ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect, + getParser().getContext()); + assert(PatchedName.endswith("dq") && "Unexpected mnemonic!"); + PatchedName = "vpclmulqdq"; + } + } Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (ExtraImmOp) @@ -785,6 +815,20 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, Operands.erase(Operands.begin() + 1); } + // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx". + if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && + Operands.size() == 3) { + X86Operand &Op = *(X86Operand*)Operands.back(); + if (Op.isMem() && Op.Mem.SegReg == 0 && + isa<MCConstantExpr>(Op.Mem.Disp) && + cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { + SMLoc Loc = Op.getEndLoc(); + Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); + delete &Op; + } + } + // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as // "f{mul*,add*,sub*,div*} $op" if ((Name.startswith("fmul") || Name.startswith("fadd") || @@ -796,6 +840,16 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, Operands.erase(Operands.begin() + 2); } + // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B, + // B". + if (Name.startswith("imul") && Operands.size() == 3 && + static_cast<X86Operand*>(Operands[1])->isImm() && + static_cast<X86Operand*>(Operands.back())->isReg()) { + X86Operand *Op = static_cast<X86Operand*>(Operands.back()); + Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(), + Op->getEndLoc())); + } + return false; } @@ -819,7 +873,7 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().is(AsmToken::EndOfStatement)) break; - + // FIXME: Improve diagnostic. if (getLexer().isNot(AsmToken::Comma)) return Error(L, "unexpected token in directive"); @@ -831,82 +885,32 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } -/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a -/// imm operand, to having "rm" or "mr" operands with the offset in the disp -/// field. -static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo, - bool isMR) { - MCOperand Disp = Inst.getOperand(0); - - // Start over with an empty instruction. - Inst = MCInst(); - Inst.setOpcode(Opc); - - if (!isMR) - Inst.addOperand(MCOperand::CreateReg(RegNo)); - - // Add the mem operand. - Inst.addOperand(MCOperand::CreateReg(0)); // Segment - Inst.addOperand(MCOperand::CreateImm(1)); // Scale - Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg - Inst.addOperand(Disp); // Displacement - Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg - - if (isMR) - Inst.addOperand(MCOperand::CreateReg(RegNo)); -} - -// FIXME: Custom X86 cleanup function to implement a temporary hack to handle -// matching INCL/DECL correctly for x86_64. This needs to be replaced by a -// proper mechanism for supporting (ambiguous) feature dependent instructions. -void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) { - if (!Is64Bit) return; - - switch (Inst.getOpcode()) { - case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break; - case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break; - case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break; - case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break; - case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break; - case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break; - case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break; - case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break; - - // moffset instructions are x86-32 only. - case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break; - case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break; - case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break; - case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break; - case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break; - case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break; - } -} bool -X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> +X86ATTAsmParser::MatchInstruction(SMLoc IDLoc, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst) { + assert(!Operands.empty() && "Unexpect empty operand list!"); + + X86Operand *Op = static_cast<X86Operand*>(Operands[0]); + assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + // First, try a direct match. if (!MatchInstructionImpl(Operands, Inst)) return false; - // Ignore anything which is obviously not a suffix match. - if (Operands.size() == 0) - return true; - X86Operand *Op = static_cast<X86Operand*>(Operands[0]); - if (!Op->isToken() || Op->getToken().size() > 15) - return true; - // FIXME: Ideally, we would only attempt suffix matches for things which are // valid prefixes, and we could just infer the right unambiguous // type. However, that requires substantially more matcher support than the // following hack. // Change the operand to point to a temporary token. - char Tmp[16]; StringRef Base = Op->getToken(); - memcpy(Tmp, Base.data(), Base.size()); - Op->setTokenValue(StringRef(Tmp, Base.size() + 1)); + SmallString<16> Tmp; + Tmp += Base; + Tmp += ' '; + Op->setTokenValue(Tmp.str()); // Check for the various suffix matches. Tmp[Base.size()] = 'b'; @@ -928,6 +932,38 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> return false; // Otherwise, the match failed. + + // If we had multiple suffix matches, then identify this as an ambiguous + // match. + if (MatchB + MatchW + MatchL + MatchQ != 4) { + char MatchChars[4]; + unsigned NumMatches = 0; + if (!MatchB) + MatchChars[NumMatches++] = 'b'; + if (!MatchW) + MatchChars[NumMatches++] = 'w'; + if (!MatchL) + MatchChars[NumMatches++] = 'l'; + if (!MatchQ) + MatchChars[NumMatches++] = 'q'; + + SmallString<126> Msg; + raw_svector_ostream OS(Msg); + OS << "ambiguous instructions require an explicit suffix (could be "; + for (unsigned i = 0; i != NumMatches; ++i) { + if (i != 0) + OS << ", "; + if (i + 1 == NumMatches) + OS << "or "; + OS << "'" << Base << MatchChars[i] << "'"; + } + OS << ")"; + Error(IDLoc, OS.str()); + } else { + // FIXME: We should give nicer diagnostics about the exact failure. + Error(IDLoc, "unrecognized instruction"); + } + return true; } diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt index b70a587ec4e24..033973eeeff93 100644 --- a/lib/Target/X86/AsmPrinter/CMakeLists.txt +++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt @@ -2,8 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMX86AsmPrinter X86ATTInstPrinter.cpp - X86AsmPrinter.cpp X86IntelInstPrinter.cpp - X86MCInstLower.cpp + X86InstComments.cpp ) add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen) diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index f2cdb5ba55eb0..554b96c96e0e5 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" +#include "X86InstComments.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -31,6 +32,10 @@ using namespace llvm; void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { printInstruction(MI, OS); + + // If verbose assembly is enabled, we can print some informative comments. + if (CommentStream) + EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 3be4bae5bec22..eb986643014c7 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -56,6 +56,9 @@ public: void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } + void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.cpp b/lib/Target/X86/AsmPrinter/X86InstComments.cpp new file mode 100644 index 0000000000000..da9d5a3579e5f --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86InstComments.cpp @@ -0,0 +1,232 @@ +//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines functionality used to emit comments about X86 instructions to +// an output stream for -fverbose-asm. +// +//===----------------------------------------------------------------------===// + +#include "X86InstComments.h" +#include "X86GenInstrNames.inc" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include "../X86ShuffleDecode.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Top Level Entrypoint +//===----------------------------------------------------------------------===// + +/// EmitAnyX86InstComments - This function decodes x86 instructions and prints +/// newline terminated strings to the specified string if desired. This +/// information is shown in disassembly dumps when verbose assembly is enabled. +void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, + const char *(*getRegName)(unsigned)) { + // If this is a shuffle operation, the switch should fill in this state. + SmallVector<unsigned, 8> ShuffleMask; + const char *DestName = 0, *Src1Name = 0, *Src2Name = 0; + + switch (MI->getOpcode()) { + case X86::INSERTPSrr: + Src1Name = getRegName(MI->getOperand(1).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); + break; + + case X86::MOVLHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodeMOVLHPSMask(2, ShuffleMask); + break; + + case X86::MOVHLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodeMOVHLPSMask(2, ShuffleMask); + break; + + case X86::PSHUFDri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PSHUFDmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + + case X86::PSHUFHWri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PSHUFHWmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::PSHUFLWri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PSHUFLWmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + + case X86::PUNPCKHBWrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHBWrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(16, ShuffleMask); + break; + case X86::PUNPCKHWDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHWDrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(8, ShuffleMask); + break; + case X86::PUNPCKHDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(4, ShuffleMask); + break; + case X86::PUNPCKHQDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHQDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(2, ShuffleMask); + break; + + case X86::PUNPCKLBWrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLBWrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(16, ShuffleMask); + break; + case X86::PUNPCKLWDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLWDrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(8, ShuffleMask); + break; + case X86::PUNPCKLDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(4, ShuffleMask); + break; + case X86::PUNPCKLQDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLQDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(2, ShuffleMask); + break; + + case X86::SHUFPDrri: + DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + break; + + case X86::SHUFPSrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::SHUFPSrmi: + DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + + case X86::UNPCKLPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKLPDrm: + DecodeUNPCKLPMask(2, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::UNPCKLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKLPSrm: + DecodeUNPCKLPMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::UNPCKHPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKHPDrm: + DecodeUNPCKHPMask(2, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::UNPCKHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKHPSrm: + DecodeUNPCKHPMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + } + + + // If this was a shuffle operation, print the shuffle mask. + if (!ShuffleMask.empty()) { + if (DestName == 0) DestName = Src1Name; + OS << (DestName ? DestName : "mem") << " = "; + + // If the two sources are the same, canonicalize the input elements to be + // from the first src so that we get larger element spans. + if (Src1Name == Src2Name) { + for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) { + if ((int)ShuffleMask[i] >= 0 && // Not sentinel. + ShuffleMask[i] >= e) // From second mask. + ShuffleMask[i] -= e; + } + } + + // The shuffle mask specifies which elements of the src1/src2 fill in the + // destination, with a few sentinel values. Loop through and print them + // out. + for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) { + if (i != 0) + OS << ','; + if (ShuffleMask[i] == SM_SentinelZero) { + OS << "zero"; + continue; + } + + // Otherwise, it must come from src1 or src2. Print the span of elements + // that comes from this src. + bool isSrc1 = ShuffleMask[i] < ShuffleMask.size(); + const char *SrcName = isSrc1 ? Src1Name : Src2Name; + OS << (SrcName ? SrcName : "mem") << '['; + bool IsFirst = true; + while (i != e && + (int)ShuffleMask[i] >= 0 && + (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) { + if (!IsFirst) + OS << ','; + else + IsFirst = false; + OS << ShuffleMask[i] % ShuffleMask.size(); + ++i; + } + OS << ']'; + --i; // For loop increments element #. + } + //MI->print(OS, 0); + OS << "\n"; + } + +} diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.h b/lib/Target/X86/AsmPrinter/X86InstComments.h new file mode 100644 index 0000000000000..6b86db4f9e5c9 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86InstComments.h @@ -0,0 +1,25 @@ +//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines functionality used to emit comments about X86 instructions to +// an output stream for -fverbose-asm. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_INST_COMMENTS_H +#define X86_INST_COMMENTS_H + +namespace llvm { + class MCInst; + class raw_ostream; + void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, + const char *(*getRegName)(unsigned)); +} + +#endif diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp index a632047f6592b..5625b0ea618f8 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" +#include "X86InstComments.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -30,6 +31,10 @@ using namespace llvm; void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { printInstruction(MI, OS); + + // If verbose assembly is enabled, we can print some informative comments. + if (CommentStream) + EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h index 4d680744dd60b..6f120322742b2 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -64,6 +64,10 @@ public: O << "XMMWORD PTR "; printMemReference(MI, OpNo, O); } + void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo, O); + } void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { O << "DWORD PTR "; printMemReference(MI, OpNo, O); diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 133482036ce1b..e9399f5c83224 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -18,23 +18,24 @@ tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info) set(sources SSEDomainFix.cpp X86AsmBackend.cpp - X86CodeEmitter.cpp + X86AsmPrinter.cpp X86COFFMachineModuleInfo.cpp + X86CodeEmitter.cpp X86ELFWriterInfo.cpp + X86FastISel.cpp X86FloatingPoint.cpp - X86FloatingPointRegKill.cpp X86ISelDAGToDAG.cpp X86ISelLowering.cpp X86InstrInfo.cpp X86JITInfo.cpp X86MCAsmInfo.cpp X86MCCodeEmitter.cpp + X86MCInstLower.cpp X86RegisterInfo.cpp + X86SelectionDAGInfo.cpp X86Subtarget.cpp X86TargetMachine.cpp X86TargetObjectFile.cpp - X86FastISel.cpp - X86SelectionDAGInfo.cpp ) if( CMAKE_CL_64 ) @@ -49,4 +50,3 @@ endif() add_llvm_target(X86CodeGen ${sources}) -target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG) diff --git a/lib/Target/X86/README-FPStack.txt b/lib/Target/X86/README-FPStack.txt index be28e8b394a42..39efd2dbcf1ad 100644 --- a/lib/Target/X86/README-FPStack.txt +++ b/lib/Target/X86/README-FPStack.txt @@ -27,8 +27,8 @@ def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW, //===---------------------------------------------------------------------===// -The FP stackifier needs to be global. Also, it should handle simple permutates -to reduce number of shuffle instructions, e.g. turning: +The FP stackifier should handle simple permutates to reduce number of shuffle +instructions, e.g. turning: fld P -> fld Q fld Q fld P diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index b6aba93f37383..f96b22f1e2042 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -2,8 +2,46 @@ // Random ideas for the X86 backend: SSE-specific stuff. //===---------------------------------------------------------------------===// -- Consider eliminating the unaligned SSE load intrinsics, replacing them with - unaligned LLVM load instructions. +//===---------------------------------------------------------------------===// + +SSE Variable shift can be custom lowered to something like this, which uses a +small table + unaligned load + shuffle instead of going through memory. + +__m128i_shift_right: + .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + .byte -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + +... +__m128i shift_right(__m128i value, unsigned long offset) { + return _mm_shuffle_epi8(value, + _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset))); +} + +//===---------------------------------------------------------------------===// + +SSE has instructions for doing operations on complex numbers, we should pattern +match them. Compiling this: + +_Complex float f32(_Complex float A, _Complex float B) { + return A+B; +} + +into: + +_f32: + movdqa %xmm0, %xmm2 + addss %xmm1, %xmm2 + pshufd $16, %xmm2, %xmm2 + pshufd $1, %xmm1, %xmm1 + pshufd $1, %xmm0, %xmm0 + addss %xmm1, %xmm0 + pshufd $16, %xmm0, %xmm1 + movdqa %xmm2, %xmm0 + unpcklps %xmm1, %xmm0 + ret + +seems silly. + //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index efc0cd82f23e9..a305ae6ec5505 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1135,13 +1135,6 @@ void test(double *P) { //===---------------------------------------------------------------------===// -handling llvm.memory.barrier on pre SSE2 cpus - -should generate: -lock ; mov %esp, %esp - -//===---------------------------------------------------------------------===// - The generated code on x86 for checking for signed overflow on a multiply the obvious way is much longer than it needs to be. @@ -1870,3 +1863,100 @@ The code produced by gcc is 3 bytes shorter. This sort of construct often shows up with bitfields. //===---------------------------------------------------------------------===// + +Take the following C code: +int f(int a, int b) { return (unsigned char)a == (unsigned char)b; } + +We generate the following IR with clang: +define i32 @f(i32 %a, i32 %b) nounwind readnone { +entry: + %tmp = xor i32 %b, %a ; <i32> [#uses=1] + %tmp6 = and i32 %tmp, 255 ; <i32> [#uses=1] + %cmp = icmp eq i32 %tmp6, 0 ; <i1> [#uses=1] + %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1] + ret i32 %conv5 +} + +And the following x86 code: + xorl %esi, %edi + testb $-1, %dil + sete %al + movzbl %al, %eax + ret + +A cmpb instead of the xorl+testb would be one instruction shorter. + +//===---------------------------------------------------------------------===// + +Given the following C code: +int f(int a, int b) { return (signed char)a == (signed char)b; } + +We generate the following IR with clang: +define i32 @f(i32 %a, i32 %b) nounwind readnone { +entry: + %sext = shl i32 %a, 24 ; <i32> [#uses=1] + %conv1 = ashr i32 %sext, 24 ; <i32> [#uses=1] + %sext6 = shl i32 %b, 24 ; <i32> [#uses=1] + %conv4 = ashr i32 %sext6, 24 ; <i32> [#uses=1] + %cmp = icmp eq i32 %conv1, %conv4 ; <i1> [#uses=1] + %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1] + ret i32 %conv5 +} + +And the following x86 code: + movsbl %sil, %eax + movsbl %dil, %ecx + cmpl %eax, %ecx + sete %al + movzbl %al, %eax + ret + + +It should be possible to eliminate the sign extensions. + +//===---------------------------------------------------------------------===// + +LLVM misses a load+store narrowing opportunity in this code: + +%struct.bf = type { i64, i16, i16, i32 } + +@bfi = external global %struct.bf* ; <%struct.bf**> [#uses=2] + +define void @t1() nounwind ssp { +entry: + %0 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] + %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1] + %2 = bitcast i16* %1 to i32* ; <i32*> [#uses=2] + %3 = load i32* %2, align 1 ; <i32> [#uses=1] + %4 = and i32 %3, -65537 ; <i32> [#uses=1] + store i32 %4, i32* %2, align 1 + %5 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1] + %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1] + %7 = bitcast i16* %6 to i32* ; <i32*> [#uses=2] + %8 = load i32* %7, align 1 ; <i32> [#uses=1] + %9 = and i32 %8, -131073 ; <i32> [#uses=1] + store i32 %9, i32* %7, align 1 + ret void +} + +LLVM currently emits this: + + movq bfi(%rip), %rax + andl $-65537, 8(%rax) + movq bfi(%rip), %rax + andl $-131073, 8(%rax) + ret + +It could narrow the loads and stores to emit this: + + movq bfi(%rip), %rax + andb $-2, 10(%rax) + movq bfi(%rip), %rax + andb $-3, 10(%rax) + ret + +The trouble is that there is a TokenFactor between the store and the +load, making it non-trivial to determine if there's anything between +the load and the store which would prohibit narrowing. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp index dab070e1febda..13680c592e01b 100644 --- a/lib/Target/X86/SSEDomainFix.cpp +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -115,7 +115,7 @@ class SSEDomainFixPass : public MachineFunctionPass { unsigned Distance; public: - SSEDomainFixPass() : MachineFunctionPass(&ID) {} + SSEDomainFixPass() : MachineFunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 677781d3730e2..27e88505150b4 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -49,11 +49,6 @@ FunctionPass *createX86FloatingPointStackifierPass(); /// crossings. FunctionPass *createSSEDomainFixPass(); -/// createX87FPRegKillInserterPass - This function returns a pass which -/// inserts FP_REG_KILL instructions where needed. -/// -FunctionPass *createX87FPRegKillInserterPass(); - /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified MCE object. FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index a53f973c1c431..a19f1acffaca8 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -67,6 +67,8 @@ def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", "Enable AVX instructions">; +def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true", + "Enable carry-less multiplication instructions">; def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", "Enable three-operand fused multiple-add">; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", @@ -180,8 +182,6 @@ include "X86CallingConv.td" // Currently the X86 assembly parser only supports ATT syntax. def ATTAsmParser : AsmParser { string AsmParserClassName = "ATTAsmParser"; - string AsmParserInstCleanup = "InstructionCleanup"; - string MatchInstructionName = "MatchInstructionImpl"; int Variant = 0; // Discard comments in assembly strings. diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 2cf65c11f94aa..69dc967f9d88c 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -11,9 +11,11 @@ #include "X86.h" #include "X86FixupKinds.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/ELFObjectWriter.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MachObjectWriter.h" @@ -190,10 +192,6 @@ public: HasScatteredSymbols = true; } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return 0; - } - bool isVirtualSection(const MCSection &Section) const { const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section); return SE.getType() == MCSectionELF::SHT_NOBITS;; @@ -204,12 +202,43 @@ class ELFX86_32AsmBackend : public ELFX86AsmBackend { public: ELFX86_32AsmBackend(const Target &T) : ELFX86AsmBackend(T) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return new ELFObjectWriter(OS, /*Is64Bit=*/false, + /*IsLittleEndian=*/true, + /*HasRelocationAddend=*/false); + } }; class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: ELFX86_64AsmBackend(const Target &T) : ELFX86AsmBackend(T) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return new ELFObjectWriter(OS, /*Is64Bit=*/true, + /*IsLittleEndian=*/true, + /*HasRelocationAddend=*/true); + } +}; + +class WindowsX86AsmBackend : public X86AsmBackend { + bool Is64Bit; +public: + WindowsX86AsmBackend(const Target &T, bool is64Bit) + : X86AsmBackend(T) + , Is64Bit(is64Bit) { + HasScatteredSymbols = true; + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createWinCOFFObjectWriter(OS, Is64Bit); + } + + bool isVirtualSection(const MCSection &Section) const { + const MCSectionCOFF &SE = static_cast<const MCSectionCOFF&>(Section); + return SE.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; + } }; class DarwinX86AsmBackend : public X86AsmBackend { @@ -290,6 +319,10 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, switch (Triple(TT).getOS()) { case Triple::Darwin: return new DarwinX86_32AsmBackend(T); + case Triple::MinGW32: + case Triple::Cygwin: + case Triple::Win32: + return new WindowsX86AsmBackend(T, false); default: return new ELFX86_32AsmBackend(T); } @@ -300,6 +333,10 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, switch (Triple(TT).getOS()) { case Triple::Darwin: return new DarwinX86_64AsmBackend(T); + case Triple::MinGW64: + case Triple::Cygwin: + case Triple::Win32: + return new WindowsX86AsmBackend(T, true); default: return new ELFX86_64AsmBackend(T); } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 08e6486d5b7a3..20110ad788cd1 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" -#include "X86ATTInstPrinter.h" -#include "X86IntelInstPrinter.h" +#include "AsmPrinter/X86ATTInstPrinter.h" +#include "AsmPrinter/X86IntelInstPrinter.h" #include "X86MCInstLower.h" #include "X86.h" #include "X86COFFMachineModuleInfo.h" @@ -24,6 +24,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Type.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -35,6 +36,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" @@ -218,6 +220,10 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { default: llvm_unreachable("Unknown pcrel immediate operand"); + case MachineOperand::MO_Register: + // pc-relativeness was handled when computing the value in the reg. + printOperand(MI, OpNo, O); + return; case MachineOperand::MO_Immediate: O << MO.getImm(); return; @@ -655,6 +661,47 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } +MachineLocation +X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + MachineLocation Location; + assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + + if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); + else { + DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + } + return Location; +} + +void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &O) { + // Only the target-dependent form of DBG_VALUE should get here. + // Referencing the offset and metadata as NOps-2 and NOps-1 is + // probably portable to other targets; frame pointer location is not. + unsigned NOps = MI->getNumOperands(); + assert(NOps==7); + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + if (V.getContext().isSubprogram()) + O << DISubprogram(V.getContext()).getDisplayName() << ":"; + O << V.getName(); + O << " <- "; + // Frame address. Currently handles register +- offset only. + O << '['; + if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) + printOperand(MI, 0, O); + else + O << "undef"; + O << '+'; printOperand(MI, 3, O); + O << ']'; + O << "+"; + printOperand(MI, NOps-2, O); +} + + //===----------------------------------------------------------------------===// // Target Registry Stuff diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index b5a7f8dc321ad..e61be66c75a2e 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -14,9 +14,9 @@ #ifndef X86ASMPRINTER_H #define X86ASMPRINTER_H -#include "../X86.h" -#include "../X86MachineFunctionInfo.h" -#include "../X86TargetMachine.h" +#include "X86.h" +#include "X86MachineFunctionInfo.h" +#include "X86TargetMachine.h" #include "llvm/ADT/StringSet.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index a6a1e4e573cff..e3409effc3187 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -33,13 +33,19 @@ def RetCC_X86Common : CallingConv<[ CCIfType<[i16], CCAssignToReg<[AX, DX]>>, CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>, CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>, - - // Vector types are returned in XMM0 and XMM1, when they fit. XMMM2 and XMM3 + + // Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3 // can only be used by ABI non-compliant code. If the target doesn't have XMM // registers, it won't have vector types. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>, + // 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3 + // can only be used by ABI non-compliant code. This vector type is only + // supported while using the AVX target feature. + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCIfSubtarget<"hasAVX()", CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>>, + // MMX vector types are always returned in MM0. If the target doesn't have // MM0, it doesn't support these vector types. CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>, @@ -164,11 +170,16 @@ def CC_X86_64_C : CallingConv<[ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfSubtarget<"hasSSE1()", CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - + + // The first 8 256-bit vector arguments are passed in YMM registers. + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCIfSubtarget<"hasAVX()", + CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>, + // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - + // Long doubles get stack slots whose size and alignment depends on the // subtarget. CCIfType<[f80], CCAssignToStack<0, 0>>, @@ -176,6 +187,10 @@ def CC_X86_64_C : CallingConv<[ // Vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, + // 256-bit vectors get 32-byte stack slots that are 32-byte aligned. + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCAssignToStack<32, 32>>, + // __m64 vectors get 8-byte stack slots that are 8-byte aligned. CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> ]>; @@ -271,9 +286,18 @@ def CC_X86_32_Common : CallingConv<[ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>, + // The first 4 AVX 256-bit vector arguments are passed in YMM registers. + CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCIfSubtarget<"hasAVX()", + CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>, + // Other SSE vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, + // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned. + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCAssignToStack<32, 32>>, + // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are // passed in the parameter area. CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index f13669bd741d1..824021c0c882b 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -53,12 +53,12 @@ namespace { public: static char ID; explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce) - : MachineFunctionPass(&ID), II(0), TD(0), TM(tm), + : MachineFunctionPass(ID), II(0), TD(0), TM(tm), MCE(mce), PICBaseOffset(0), Is64BitMode(false), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} Emitter(X86TargetMachine &tm, CodeEmitter &mce, const X86InstrInfo &ii, const TargetData &td, bool is64) - : MachineFunctionPass(&ID), II(&ii), TD(&td), TM(tm), + : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), MCE(mce), PICBaseOffset(0), Is64BitMode(is64), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} @@ -146,6 +146,103 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { return false; } +/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 +/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand +/// size, and 3) use of X86-64 extended registers. +static unsigned determineREX(const MachineInstr &MI) { + unsigned REX = 0; + const TargetInstrDesc &Desc = MI.getDesc(); + + // Pseudo instructions do not need REX prefix byte. + if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) + return 0; + if (Desc.TSFlags & X86II::REX_W) + REX |= 1 << 3; + + unsigned NumOps = Desc.getNumOperands(); + if (NumOps) { + bool isTwoAddr = NumOps > 1 && + Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. + unsigned i = isTwoAddr ? 1 : 0; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) + REX |= 0x40; + } + } + + switch (Desc.TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= (1 << 0) | (1 << 2); + break; + case X86II::MRMSrcReg: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << 0; + } + break; + } + case X86II::MRMSrcMem: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + unsigned Bit = 0; + i = isTwoAddr ? 2 : 1; + for (; i != NumOps; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: { + unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); + i = isTwoAddr ? 1 : 0; + if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e))) + REX |= 1 << 2; + unsigned Bit = 0; + for (; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + default: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 0; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << 2; + } + break; + } + } + } + return REX; +} + + /// emitPCRelativeBlockAddress - This method keeps track of the information /// necessary to resolve the address of this block later and emits a dummy /// value. @@ -569,7 +666,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, // Handle REX prefix. if (Is64BitMode) { - if (unsigned REX = X86InstrInfo::determineREX(MI)) + if (unsigned REX = determineREX(MI)) MCE.emitByte(0x40 | REX); } @@ -605,24 +702,29 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, // base address. switch (Opcode) { default: - llvm_unreachable("psuedo instructions should be removed before code" + llvm_unreachable("pseudo instructions should be removed before code" " emission"); break; + // Do nothing for Int_MemBarrier - it's just a comment. Add a debug + // to make it slightly easier to see. + case X86::Int_MemBarrier: + DEBUG(dbgs() << "#MEMBARRIER\n"); + break; + case TargetOpcode::INLINEASM: // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. if (MI.getOperand(0).getSymbolName()[0]) report_fatal_error("JIT does not support inline asm!"); break; - case TargetOpcode::DBG_LABEL: + case TargetOpcode::PROLOG_LABEL: case TargetOpcode::GC_LABEL: case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; - + case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: - case X86::FP_REG_KILL: break; case X86::MOVPC32r: { // This emits the "call" portion of this pseudo instruction. @@ -674,7 +776,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, } assert(MO.isImm() && "Unknown RawFrm operand!"); - if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { + if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 || + Opcode == X86::WINCALL64pcrel32) { // Fix up immediate operand for pc relative calls. intptr_t Imm = (intptr_t)MO.getImm(); Imm = Imm - MCE.getCurrentPCValue() - 4; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index ce1370763b77f..0c70eec4827fb 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -960,9 +960,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - // Fold the common case of a conditional branch with a comparison. + // Fold the common case of a conditional branch with a comparison + // in the same block (values defined on other blocks may not have + // initialized registers). if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { - if (CI->hasOneUse()) { + if (CI->hasOneUse() && CI->getParent() == I->getParent()) { EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); // Try to take advantage of fallthrough opportunities. @@ -1058,10 +1060,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { const MachineInstr &MI = *RI; if (MI.definesRegister(Reg)) { - unsigned Src, Dst, SrcSR, DstSR; - - if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) { - Reg = Src; + if (MI.isCopy()) { + Reg = MI.getOperand(1).getReg(); continue; } @@ -1648,15 +1648,26 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { MachineInstrBuilder MIB; if (CalleeOp) { // Register-indirect call. - unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r; + unsigned CallOpc; + if (Subtarget->isTargetWin64()) + CallOpc = X86::WINCALL64r; + else if (Subtarget->is64Bit()) + CallOpc = X86::CALL64r; + else + CallOpc = X86::CALL32r; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addReg(CalleeOp); } else { // Direct call. assert(GV && "Not a direct call"); - unsigned CallOpc = - Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; + unsigned CallOpc; + if (Subtarget->isTargetWin64()) + CallOpc = X86::WINCALL64pcrel32; + else if (Subtarget->is64Bit()) + CallOpc = X86::CALL64pcrel32; + else + CallOpc = X86::CALLpcrel32; // See if we need any target-specific flags on the GV operand. unsigned char OpFlags = 0; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index cee4ad70201a7..e6ebf669587dd 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -8,23 +8,18 @@ //===----------------------------------------------------------------------===// // // This file defines the pass which converts floating point instructions from -// virtual registers into register stack instructions. This pass uses live +// pseudo registers into register stack instructions. This pass uses live // variable information to indicate where the FPn registers are used and their // lifetimes. // -// This pass is hampered by the lack of decent CFG manipulation routines for -// machine code. In particular, this wants to be able to split critical edges -// as necessary, traverse the machine basic block CFG in depth-first order, and -// allow there to be multiple machine basic blocks for each LLVM basicblock -// (needed for critical edge splitting). +// The x87 hardware tracks liveness of the stack registers, so it is necessary +// to implement exact liveness tracking between basic blocks. The CFG edges are +// partitioned into bundles where the same FP registers must be live in +// identical stack positions. Instructions are inserted at the end of each basic +// block to rearrange the live registers to match the outgoing bundle. // -// In particular, this pass currently barfs on critical edges. Because of this, -// it requires the instruction selector to insert FP_REG_KILL instructions on -// the exits of any basic block that has critical edges going from it, or which -// branch to a critical basic block. -// -// FIXME: this is not implemented yet. The stackifier pass only works on local -// basic blocks. +// This approach avoids splitting critical edges at the potential cost of more +// live register shuffling instructions when critical edges are present. // //===----------------------------------------------------------------------===// @@ -32,6 +27,7 @@ #include "X86.h" #include "X86InstrInfo.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -54,7 +50,12 @@ STATISTIC(NumFP , "Number of floating point instructions"); namespace { struct FPS : public MachineFunctionPass { static char ID; - FPS() : MachineFunctionPass(&ID) {} + FPS() : MachineFunctionPass(ID) { + // This is really only to keep valgrind quiet. + // The logic in isLive() is too much for it. + memset(Stack, 0, sizeof(Stack)); + memset(RegMap, 0, sizeof(RegMap)); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -69,11 +70,71 @@ namespace { private: const TargetInstrInfo *TII; // Machine instruction info. + + // Two CFG edges are related if they leave the same block, or enter the same + // block. The transitive closure of an edge under this relation is a + // LiveBundle. It represents a set of CFG edges where the live FP stack + // registers must be allocated identically in the x87 stack. + // + // A LiveBundle is usually all the edges leaving a block, or all the edges + // entering a block, but it can contain more edges if critical edges are + // present. + // + // The set of live FP registers in a LiveBundle is calculated by bundleCFG, + // but the exact mapping of FP registers to stack slots is fixed later. + struct LiveBundle { + // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c. + unsigned Mask; + + // Number of pre-assigned live registers in FixStack. This is 0 when the + // stack order has not yet been fixed. + unsigned FixCount; + + // Assigned stack order for live-in registers. + // FixStack[i] == getStackEntry(i) for all i < FixCount. + unsigned char FixStack[8]; + + LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {} + + // Have the live registers been assigned a stack order yet? + bool isFixed() const { return !Mask || FixCount; } + }; + + // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges + // with no live FP registers. + SmallVector<LiveBundle, 8> LiveBundles; + + // Map each MBB in the current function to an (ingoing, outgoing) index into + // LiveBundles. Blocks with no FP registers live in or out map to (0, 0) + // and are not actually stored in the map. + DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle; + + // Return a bitmask of FP registers in block's live-in list. + unsigned calcLiveInMask(MachineBasicBlock *MBB) { + unsigned Mask = 0; + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) { + unsigned Reg = *I - X86::FP0; + if (Reg < 8) + Mask |= 1 << Reg; + } + return Mask; + } + + // Partition all the CFG edges into LiveBundles. + void bundleCFG(MachineFunction &MF); + MachineBasicBlock *MBB; // Current basic block unsigned Stack[8]; // FP<n> Registers in each stack slot... unsigned RegMap[8]; // Track which stack slot contains each register unsigned StackTop; // The current top of the FP stack. + // Set up our stack model to match the incoming registers to MBB. + void setupBlockStack(); + + // Shuffle live registers to match the expectations of successor blocks. + void finishBlockStack(); + void dumpStack() const { dbgs() << "Stack contents:"; for (unsigned i = 0; i != StackTop; ++i) { @@ -82,27 +143,36 @@ namespace { } dbgs() << "\n"; } - private: - /// isStackEmpty - Return true if the FP stack is empty. - bool isStackEmpty() const { - return StackTop == 0; - } - - // getSlot - Return the stack slot number a particular register number is - // in. + + /// getSlot - Return the stack slot number a particular register number is + /// in. unsigned getSlot(unsigned RegNo) const { assert(RegNo < 8 && "Regno out of range!"); return RegMap[RegNo]; } - // getStackEntry - Return the X86::FP<n> register in register ST(i). + /// isLive - Is RegNo currently live in the stack? + bool isLive(unsigned RegNo) const { + unsigned Slot = getSlot(RegNo); + return Slot < StackTop && Stack[Slot] == RegNo; + } + + /// getScratchReg - Return an FP register that is not currently in use. + unsigned getScratchReg() { + for (int i = 7; i >= 0; --i) + if (!isLive(i)) + return i; + llvm_unreachable("Ran out of scratch FP registers"); + } + + /// getStackEntry - Return the X86::FP<n> register in register ST(i). unsigned getStackEntry(unsigned STi) const { assert(STi < StackTop && "Access past stack top!"); return Stack[StackTop-1-STi]; } - // getSTReg - Return the X86::ST(i) register which contains the specified - // FP<RegNo> register. + /// getSTReg - Return the X86::ST(i) register which contains the specified + /// FP<RegNo> register. unsigned getSTReg(unsigned RegNo) const { return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; } @@ -117,10 +187,9 @@ namespace { bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { - MachineInstr *MI = I; - DebugLoc dl = MI->getDebugLoc(); + DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); if (isAtTop(RegNo)) return; - + unsigned STReg = getSTReg(RegNo); unsigned RegOnTop = getStackEntry(0); @@ -137,24 +206,37 @@ namespace { } void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { - DebugLoc dl = I->getDebugLoc(); + DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); unsigned STReg = getSTReg(RegNo); pushReg(AsReg); // New register on top of stack BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); } - // popStackAfter - Pop the current value off of the top of the FP stack - // after the specified instruction. + /// popStackAfter - Pop the current value off of the top of the FP stack + /// after the specified instruction. void popStackAfter(MachineBasicBlock::iterator &I); - // freeStackSlotAfter - Free the specified register from the register stack, - // so that it is no longer in a register. If the register is currently at - // the top of the stack, we just pop the current instruction, otherwise we - // store the current top-of-stack into the specified slot, then pop the top - // of stack. + /// freeStackSlotAfter - Free the specified register from the register + /// stack, so that it is no longer in a register. If the register is + /// currently at the top of the stack, we just pop the current instruction, + /// otherwise we store the current top-of-stack into the specified slot, + /// then pop the top of stack. void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); + /// freeStackSlotBefore - Just the pop, no folding. Return the inserted + /// instruction. + MachineBasicBlock::iterator + freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo); + + /// Adjust the live registers to be the set in Mask. + void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); + + /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is + /// st(0), FP reg FixStack[1] is st(1) etc. + void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, + MachineBasicBlock::iterator I); + bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); void handleZeroArgFP(MachineBasicBlock::iterator &I); @@ -181,7 +263,6 @@ static unsigned getFPReg(const MachineOperand &MO) { return Reg - X86::FP0; } - /// runOnMachineFunction - Loop over all of the basic blocks, transforming FP /// register references into FP stack references. /// @@ -201,6 +282,10 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { if (!FPIsUsed) return false; TII = MF.getTarget().getInstrInfo(); + + // Prepare cross-MBB liveness. + bundleCFG(MF); + StackTop = 0; // Process the function in depth first order so that we process at least one @@ -215,16 +300,111 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { Changed |= processBasicBlock(MF, **I); // Process any unreachable blocks in arbitrary order now. - if (MF.size() == Processed.size()) - return Changed; + if (MF.size() != Processed.size()) + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + if (Processed.insert(BB)) + Changed |= processBasicBlock(MF, *BB); + + BlockBundle.clear(); + LiveBundles.clear(); - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) - if (Processed.insert(BB)) - Changed |= processBasicBlock(MF, *BB); - return Changed; } +/// bundleCFG - Scan all the basic blocks to determine consistent live-in and +/// live-out sets for the FP registers. Consistent means that the set of +/// registers live-out from a block is identical to the live-in set of all +/// successors. This is not enforced by the normal live-in lists since +/// registers may be implicitly defined, or not used by all successors. +void FPS::bundleCFG(MachineFunction &MF) { + assert(LiveBundles.empty() && "Stale data in LiveBundles"); + assert(BlockBundle.empty() && "Stale data in BlockBundle"); + SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp; + + // LiveBundle[0] is the empty live-in set. + LiveBundles.resize(1); + + // First gather the actual live-in masks for all MBBs. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = I; + const unsigned Mask = calcLiveInMask(MBB); + if (!Mask) + continue; + // Ingoing bundle index. + unsigned &Idx = BlockBundle[MBB].first; + // Already assigned an ingoing bundle? + if (Idx) + continue; + // Allocate a new LiveBundle struct for this block's live-ins. + const unsigned BundleIdx = Idx = LiveBundles.size(); + DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#" + << MBB->getNumber()); + LiveBundles.push_back(Mask); + LiveBundle &Bundle = LiveBundles.back(); + + // Make sure all predecessors have the same live-out set. + PropUp.insert(MBB); + + // Keep pushing liveness up and down the CFG until convergence. + // Only critical edges cause iteration here, but when they do, multiple + // blocks can be assigned to the same LiveBundle index. + do { + // Assign BundleIdx as liveout from predecessors in PropUp. + for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(), + E = PropUp.end(); I != E; ++I) { + MachineBasicBlock *MBB = *I; + for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(), + LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) { + MachineBasicBlock *PredMBB = *LinkI; + // PredMBB's liveout bundle should be set to LIIdx. + unsigned &Idx = BlockBundle[PredMBB].second; + if (Idx) { + assert(Idx == BundleIdx && "Inconsistent CFG"); + continue; + } + Idx = BundleIdx; + DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber()); + // Propagate to siblings. + if (PredMBB->succ_size() > 1) + PropDown.insert(PredMBB); + } + } + PropUp.clear(); + + // Assign BundleIdx as livein to successors in PropDown. + for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(), + E = PropDown.end(); I != E; ++I) { + MachineBasicBlock *MBB = *I; + for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(), + LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) { + MachineBasicBlock *SuccMBB = *LinkI; + // LinkMBB's livein bundle should be set to BundleIdx. + unsigned &Idx = BlockBundle[SuccMBB].first; + if (Idx) { + assert(Idx == BundleIdx && "Inconsistent CFG"); + continue; + } + Idx = BundleIdx; + DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber()); + // Propagate to siblings. + if (SuccMBB->pred_size() > 1) + PropUp.insert(SuccMBB); + // Also accumulate the bundle liveness mask from the liveins here. + Bundle.Mask |= calcLiveInMask(SuccMBB); + } + } + PropDown.clear(); + } while (!PropUp.empty()); + DEBUG({ + dbgs() << " live:"; + for (unsigned i = 0; i < 8; ++i) + if (Bundle.Mask & (1<<i)) + dbgs() << " %FP" << i; + dbgs() << '\n'; + }); + } +} + /// processBasicBlock - Loop over all of the instructions in the basic block, /// transforming FP instructions into their stack form. /// @@ -232,10 +412,12 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { bool Changed = false; MBB = &BB; + setupBlockStack(); + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { MachineInstr *MI = I; uint64_t Flags = MI->getDesc().TSFlags; - + unsigned FPInstClass = Flags & X86II::FPTypeMask; if (MI->isInlineAsm()) FPInstClass = X86II::SpecialFP; @@ -302,10 +484,82 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { Changed = true; } - assert(isStackEmpty() && "Stack not empty at end of basic block?"); + finishBlockStack(); + return Changed; } +/// setupBlockStack - Use the BlockBundle map to set up our model of the stack +/// to match predecessors' live out stack. +void FPS::setupBlockStack() { + DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() + << " derived from " << MBB->getName() << ".\n"); + StackTop = 0; + const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first]; + + if (!Bundle.Mask) { + DEBUG(dbgs() << "Block has no FP live-ins.\n"); + return; + } + + // Depth-first iteration should ensure that we always have an assigned stack. + assert(Bundle.isFixed() && "Reached block before any predecessors"); + + // Push the fixed live-in registers. + for (unsigned i = Bundle.FixCount; i > 0; --i) { + MBB->addLiveIn(X86::ST0+i-1); + DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" + << unsigned(Bundle.FixStack[i-1]) << '\n'); + pushReg(Bundle.FixStack[i-1]); + } + + // Kill off unwanted live-ins. This can happen with a critical edge. + // FIXME: We could keep these live registers around as zombies. They may need + // to be revived at the end of a short block. It might save a few instrs. + adjustLiveRegs(calcLiveInMask(MBB), MBB->begin()); + DEBUG(MBB->dump()); +} + +/// finishBlockStack - Revive live-outs that are implicitly defined out of +/// MBB. Shuffle live registers to match the expected fixed stack of any +/// predecessors, and ensure that all predecessors are expecting the same +/// stack. +void FPS::finishBlockStack() { + // The RET handling below takes care of return blocks for us. + if (MBB->succ_empty()) + return; + + DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() + << " derived from " << MBB->getName() << ".\n"); + + unsigned BundleIdx = BlockBundle.lookup(MBB).second; + LiveBundle &Bundle = LiveBundles[BundleIdx]; + + // We may need to kill and define some registers to match successors. + // FIXME: This can probably be combined with the shuffle below. + MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); + adjustLiveRegs(Bundle.Mask, Term); + + if (!Bundle.Mask) { + DEBUG(dbgs() << "No live-outs.\n"); + return; + } + + // Has the stack order been fixed yet? + DEBUG(dbgs() << "LB#" << BundleIdx << ": "); + if (Bundle.isFixed()) { + DEBUG(dbgs() << "Shuffling stack to match.\n"); + shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term); + } else { + // Not fixed yet, we get to choose. + DEBUG(dbgs() << "Fixing stack order now.\n"); + Bundle.FixCount = StackTop; + for (unsigned i = 0; i < StackTop; ++i) + Bundle.FixStack[i] = getStackEntry(i); + } +} + + //===----------------------------------------------------------------------===// // Efficient Lookup Table Support //===----------------------------------------------------------------------===// @@ -318,7 +572,7 @@ namespace { friend bool operator<(const TableEntry &TE, unsigned V) { return TE.from < V; } - friend bool operator<(unsigned V, const TableEntry &TE) { + friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) { return V < TE.from; } }; @@ -597,6 +851,13 @@ void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { // Otherwise, store the top of stack into the dead slot, killing the operand // without having to add in an explicit xchg then pop. // + I = freeStackSlotBefore(++I, FPRegNo); +} + +/// freeStackSlotBefore - Free the specified register without trying any +/// folding. +MachineBasicBlock::iterator +FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) { unsigned STReg = getSTReg(FPRegNo); unsigned OldSlot = getSlot(FPRegNo); unsigned TopReg = Stack[StackTop-1]; @@ -604,9 +865,90 @@ void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { RegMap[TopReg] = OldSlot; RegMap[FPRegNo] = ~0; Stack[--StackTop] = ~0; - MachineInstr *MI = I; - DebugLoc dl = MI->getDebugLoc(); - I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(STReg); + return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg); +} + +/// adjustLiveRegs - Kill and revive registers such that exactly the FP +/// registers with a bit in Mask are live. +void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { + unsigned Defs = Mask; + unsigned Kills = 0; + for (unsigned i = 0; i < StackTop; ++i) { + unsigned RegNo = Stack[i]; + if (!(Defs & (1 << RegNo))) + // This register is live, but we don't want it. + Kills |= (1 << RegNo); + else + // We don't need to imp-def this live register. + Defs &= ~(1 << RegNo); + } + assert((Kills & Defs) == 0 && "Register needs killing and def'ing?"); + + // Produce implicit-defs for free by using killed registers. + while (Kills && Defs) { + unsigned KReg = CountTrailingZeros_32(Kills); + unsigned DReg = CountTrailingZeros_32(Defs); + DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n"); + std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]); + std::swap(RegMap[KReg], RegMap[DReg]); + Kills &= ~(1 << KReg); + Defs &= ~(1 << DReg); + } + + // Kill registers by popping. + if (Kills && I != MBB->begin()) { + MachineBasicBlock::iterator I2 = llvm::prior(I); + for (;;) { + unsigned KReg = getStackEntry(0); + if (!(Kills & (1 << KReg))) + break; + DEBUG(dbgs() << "Popping %FP" << KReg << "\n"); + popStackAfter(I2); + Kills &= ~(1 << KReg); + } + } + + // Manually kill the rest. + while (Kills) { + unsigned KReg = CountTrailingZeros_32(Kills); + DEBUG(dbgs() << "Killing %FP" << KReg << "\n"); + freeStackSlotBefore(I, KReg); + Kills &= ~(1 << KReg); + } + + // Load zeros for all the imp-defs. + while(Defs) { + unsigned DReg = CountTrailingZeros_32(Defs); + DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n"); + BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0)); + pushReg(DReg); + Defs &= ~(1 << DReg); + } + + // Now we should have the correct registers live. + DEBUG(dumpStack()); + assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch"); +} + +/// shuffleStackTop - emit fxch instructions before I to shuffle the top +/// FixCount entries into the order given by FixStack. +/// FIXME: Is there a better algorithm than insertion sort? +void FPS::shuffleStackTop(const unsigned char *FixStack, + unsigned FixCount, + MachineBasicBlock::iterator I) { + // Move items into place, starting from the desired stack bottom. + while (FixCount--) { + // Old register at position FixCount. + unsigned OldReg = getStackEntry(FixCount); + // Desired register at position FixCount. + unsigned Reg = FixStack[FixCount]; + if (Reg == OldReg) + continue; + // (Reg st0) (OldReg st0) = (Reg OldReg st0) + moveToTop(Reg, I); + moveToTop(OldReg, I); + } + DEBUG(dumpStack()); } @@ -660,7 +1002,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { MI->getOpcode() == X86::ISTT_Fp32m80 || MI->getOpcode() == X86::ISTT_Fp64m80 || MI->getOpcode() == X86::ST_FpP80m)) { - duplicateToTop(Reg, 7 /*temp register*/, I); + duplicateToTop(Reg, getScratchReg(), I); } else { moveToTop(Reg, I); // Move to the top of the stack... } @@ -1013,8 +1355,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { if (!MI->killsRegister(X86::FP0 + Op0)) { // Duplicate Op0 into a temporary on the stack top. - // This actually assumes that FP7 is dead. - duplicateToTop(Op0, 7, I); + duplicateToTop(Op0, getScratchReg(), I); } else { // Op0 is killed, so just swap it into position. moveToTop(Op0, I); @@ -1034,8 +1375,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { ++StackTop; unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). if (!MI->killsRegister(X86::FP0 + Op0)) { - // Assume FP6 is not live, use it as a scratch register. - duplicateToTop(Op0, 6, I); + duplicateToTop(Op0, getScratchReg(), I); moveToTop(RegOnTop, I); } else if (getSTReg(Op0) != X86::ST1) { // We have the wrong value at st(1). Shuffle! Untested! @@ -1119,11 +1459,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { case X86::RETI: // If RET has an FP register use operand, pass the first one in ST(0) and // the second one in ST(1). - if (isStackEmpty()) return; // Quick check to see if any are possible. - + // Find the register operands. unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; - + unsigned LiveMask = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) @@ -1142,12 +1482,18 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { assert(SecondFPRegOp == ~0U && "More than two fp operands!"); SecondFPRegOp = getFPReg(Op); } + LiveMask |= (1 << getFPReg(Op)); // Remove the operand so that later passes don't see it. MI->RemoveOperand(i); --i, --e; } - + + // We may have been carrying spurious live-ins, so make sure only the returned + // registers are left live. + adjustLiveRegs(LiveMask, MI); + if (!LiveMask) return; // Quick check to see if any are possible. + // There are only four possibilities here: // 1) we are returning a single FP value. In this case, it has to be in // ST(0) already, so just declare success by removing the value from the @@ -1173,7 +1519,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // Duplicate the TOS so that we return it twice. Just pick some other FPx // register to hold it. - unsigned NewReg = (FirstFPRegOp+1)%7; + unsigned NewReg = getScratchReg(); duplicateToTop(FirstFPRegOp, NewReg, MI); FirstFPRegOp = NewReg; } @@ -1197,7 +1543,14 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } I = MBB->erase(I); // Remove the pseudo instruction - --I; + + // We want to leave I pointing to the previous instruction, but what if we + // just erased the first instruction? + if (I == MBB->begin()) { + DEBUG(dbgs() << "Inserting dummy KILL\n"); + I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL)); + } else + --I; } // Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp deleted file mode 100644 index 2c98b96c510b9..0000000000000 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ /dev/null @@ -1,153 +0,0 @@ -//===-- X86FloatingPoint.cpp - FP_REG_KILL inserter -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the pass which inserts FP_REG_KILL instructions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "x86-codegen" -#include "X86.h" -#include "X86InstrInfo.h" -#include "llvm/Instructions.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/CFG.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -STATISTIC(NumFPKill, "Number of FP_REG_KILL instructions added"); - -namespace { - struct FPRegKiller : public MachineFunctionPass { - static char ID; - FPRegKiller() : MachineFunctionPass(&ID) {} - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "X86 FP_REG_KILL inserter"; - } - }; - char FPRegKiller::ID = 0; -} - -FunctionPass *llvm::createX87FPRegKillInserterPass() { - return new FPRegKiller(); -} - -/// isFPStackVReg - Return true if the specified vreg is from a fp stack -/// register class. -static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) { - if (!TargetRegisterInfo::isVirtualRegister(RegNo)) - return false; - - switch (MRI.getRegClass(RegNo)->getID()) { - default: return false; - case X86::RFP32RegClassID: - case X86::RFP64RegClassID: - case X86::RFP80RegClassID: - return true; - } -} - - -/// ContainsFPStackCode - Return true if the specific MBB has floating point -/// stack code, and thus needs an FP_REG_KILL. -static bool ContainsFPStackCode(MachineBasicBlock *MBB, - const MachineRegisterInfo &MRI) { - // Scan the block, looking for instructions that define or use fp stack vregs. - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - if (!I->getOperand(op).isReg()) - continue; - if (unsigned Reg = I->getOperand(op).getReg()) - if (isFPStackVReg(Reg, MRI)) - return true; - } - } - - // Check PHI nodes in successor blocks. These PHI's will be lowered to have - // a copy of the input value in this block, which is a definition of the - // value. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - E = MBB->succ_end(); SI != E; ++ SI) { - MachineBasicBlock *SuccBB = *SI; - for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end(); - I != E; ++I) { - // All PHI nodes are at the top of the block. - if (!I->isPHI()) break; - - if (isFPStackVReg(I->getOperand(0).getReg(), MRI)) - return true; - } - } - - return false; -} - -bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { - // If we are emitting FP stack code, scan the basic block to determine if this - // block defines or uses any FP values. If so, put an FP_REG_KILL instruction - // before the terminator of the block. - - // Note that FP stack instructions are used in all modes for long double, - // so we always need to do this check. - // Also note that it's possible for an FP stack register to be live across - // an instruction that produces multiple basic blocks (SSE CMOV) so we - // must check all the generated basic blocks. - - // Scan all of the machine instructions in these MBBs, checking for FP - // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.) - - // Fast-path: If nothing is using the x87 registers, we don't need to do - // any scanning. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() && - MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() && - MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty()) - return false; - - bool Changed = false; - MachineFunction::iterator MBBI = MF.begin(); - MachineFunction::iterator EndMBB = MF.end(); - for (; MBBI != EndMBB; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - - // If this block returns, ignore it. We don't want to insert an FP_REG_KILL - // before the return. - if (!MBB->empty()) { - MachineBasicBlock::iterator EndI = MBB->end(); - --EndI; - if (EndI->getDesc().isReturn()) - continue; - } - - // If we find any FP stack code, emit the FP_REG_KILL instruction. - if (ContainsFPStackCode(MBB, MRI)) { - BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(), - MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL)); - ++NumFPKill; - Changed = true; - } - } - - return Changed; -} diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 72f2bc11d7cc7..c5234413aba63 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -171,6 +171,17 @@ namespace { virtual void PreprocessISelDAG(); + inline bool immSext8(SDNode *N) const { + return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue()); + } + + // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit + // sign extended field. + inline bool i64immSExt32(SDNode *N) const { + uint64_t v = cast<ConstantSDNode>(N)->getZExtValue(); + return (int64_t)v == (int32_t)v; + } + // Include the pieces autogenerated from the target description. #include "X86GenDAGISel.inc" @@ -1312,13 +1323,6 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } -static SDNode *FindCallStartFromCall(SDNode *Node) { - if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Node doesn't have a token chain argument!"); - return FindCallStartFromCall(Node->getOperand(0).getNode()); -} - SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue Chain = Node->getOperand(0); SDValue In1 = Node->getOperand(1); @@ -1403,7 +1407,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC16m; else if (isSub) { if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_SUB16mi8; else Opc = X86::LOCK_SUB16mi; @@ -1411,7 +1415,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB16mr; } else { if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_ADD16mi8; else Opc = X86::LOCK_ADD16mi; @@ -1426,7 +1430,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC32m; else if (isSub) { if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_SUB32mi8; else Opc = X86::LOCK_SUB32mi; @@ -1434,7 +1438,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB32mr; } else { if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_ADD32mi8; else Opc = X86::LOCK_ADD32mi; @@ -1450,17 +1454,17 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { else if (isSub) { Opc = X86::LOCK_SUB64mr; if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_SUB64mi8; - else if (Predicate_i64immSExt32(Val.getNode())) + else if (i64immSExt32(Val.getNode())) Opc = X86::LOCK_SUB64mi32; } } else { Opc = X86::LOCK_ADD64mr; if (isCN) { - if (Predicate_immSext8(Val.getNode())) + if (immSext8(Val.getNode())) Opc = X86::LOCK_ADD64mi8; - else if (Predicate_i64immSExt32(Val.getNode())) + else if (i64immSExt32(Val.getNode())) Opc = X86::LOCK_ADD64mi32; } } @@ -1841,7 +1845,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to // use a smaller encoding. - if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && + HasNoSignedComparisonUses(Node)) // Look past the truncate if CMP is the only use of it. N0 = N0.getOperand(0); if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b3c48862898ff..95dbb61766874 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16,6 +16,7 @@ #include "X86.h" #include "X86InstrBuilder.h" #include "X86ISelLowering.h" +#include "X86ShuffleDecode.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" #include "llvm/CallingConv.h" @@ -343,8 +344,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); - if (!Subtarget->hasSSE2()) - setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + // We may not have a libcall for MEMBARRIER so we should lower this. + setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); + // On X86 and X86-64, atomic operations are lowered to locked instructions. // Locked instructions, in turn, have implicit fence semantics (all memory // operations are flushed before issuing the locked instruction, and they @@ -837,6 +839,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); + // Can turn SHL into an integer multiply. + setOperationAction(ISD::SHL, MVT::v4i32, Custom); + setOperationAction(ISD::SHL, MVT::v16i8, Custom); + // i8 and i16 vectors are custom , because the source register and source // source memory operand types are not the same width. f32 vectors are // custom since the immediate controlling the insert encodes additional @@ -866,6 +872,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); + addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); setOperationAction(ISD::LOAD, MVT::v8f32, Legal); setOperationAction(ISD::LOAD, MVT::v8i32, Legal); @@ -877,7 +884,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v8f32, Legal); setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); setOperationAction(ISD::FNEG, MVT::v8f32, Custom); - //setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom); //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom); //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom); //setOperationAction(ISD::SELECT, MVT::v8f32, Custom); @@ -1189,6 +1196,50 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; } +std::pair<const TargetRegisterClass*, uint8_t> +X86TargetLowering::findRepresentativeClass(EVT VT) const{ + const TargetRegisterClass *RRC = 0; + uint8_t Cost = 1; + switch (VT.getSimpleVT().SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(VT); + case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: + RRC = (Subtarget->is64Bit() + ? X86::GR64RegisterClass : X86::GR32RegisterClass); + break; + case MVT::v8i8: case MVT::v4i16: + case MVT::v2i32: case MVT::v1i64: + RRC = X86::VR64RegisterClass; + break; + case MVT::f32: case MVT::f64: + case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: + case MVT::v4f32: case MVT::v2f64: + case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32: + case MVT::v4f64: + RRC = X86::VR128RegisterClass; + break; + } + return std::make_pair(RRC, Cost); +} + +unsigned +X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0; + switch (RC->getID()) { + default: + return 0; + case X86::GR32RegClassID: + return 4 - FPDiff; + case X86::GR64RegClassID: + return 8 - FPDiff; + case X86::VR128RegClassID: + return Subtarget->is64Bit() ? 10 : 4; + case X86::VR64RegClassID: + return 4; + } +} + bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const { if (!Subtarget->isTargetLinux()) @@ -1259,6 +1310,19 @@ X86TargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue ValToCopy = OutVals[i]; + EVT ValVT = ValToCopy.getValueType(); + + // If this is x86-64, and we disabled SSE, we can't return FP values + if ((ValVT == MVT::f32 || ValVT == MVT::f64) && + (Subtarget->is64Bit() && !Subtarget->hasSSE1())) { + report_fatal_error("SSE register return with SSE disabled"); + } + // Likewise we can't return F64 values with SSE1 only. gcc does so, but + // llvm-gcc has never done it right and no one has noticed, so this + // should be OK for now. + if (ValVT == MVT::f64 && + (Subtarget->is64Bit() && !Subtarget->hasSSE2())) + report_fatal_error("SSE2 register return with SSE2 disabled"); // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. @@ -1276,14 +1340,20 @@ X86TargetLowering::LowerReturn(SDValue Chain, // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 // which is returned in RAX / RDX. if (Subtarget->is64Bit()) { - EVT ValVT = ValToCopy.getValueType(); if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); - if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) - ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); + if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { + ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, + ValToCopy); + + // If we don't have SSE2 available, convert to v4f32 so the generated + // register is legal. + if (!Subtarget->hasSSE2()) + ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy); + } } } - + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); Flag = Chain.getValue(1); } @@ -1570,6 +1640,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, RC = X86::FR32RegisterClass; else if (RegVT == MVT::f64) RC = X86::FR64RegisterClass; + else if (RegVT.isVector() && RegVT.getSizeInBits() == 256) + RC = X86::VR256RegisterClass; else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) RC = X86::VR128RegisterClass; else if (RegVT.isVector() && RegVT.getSizeInBits() == 64) @@ -1937,6 +2009,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (isVarArg && Subtarget->isTargetWin64()) { + // Win64 ABI requires argument XMM reg to be copied to the corresponding + // shadow reg if callee is a varargs function. + unsigned ShadowReg = 0; + switch (VA.getLocReg()) { + case X86::XMM0: ShadowReg = X86::RCX; break; + case X86::XMM1: ShadowReg = X86::RDX; break; + case X86::XMM2: ShadowReg = X86::R8; break; + case X86::XMM3: ShadowReg = X86::R9; break; + } + if (ShadowReg) + RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); + } } else if (!IsSibcall && (!isTailCall || isByVal)) { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) @@ -1990,7 +2075,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } } - if (Is64Bit && isVarArg) { + if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) { // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in @@ -1999,7 +2084,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // the number of registers, but must be an ubound on the number of SSE // registers used and is in the range 0 - 8 inclusive. - // FIXME: Verify this on Win64 // Count the number of XMM registers allocated. static const unsigned XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, @@ -2165,8 +2249,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (!isTailCall && Subtarget->isPICStyleGOT()) Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); - // Add an implicit use of AL for x86 vararg functions. - if (Is64Bit && isVarArg) + // Add an implicit use of AL for non-Windows x86 64-bit vararg functions. + if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); if (InFlag.getNode()) @@ -2356,8 +2440,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (RegInfo->needsStackRealignment(MF)) return false; - // Do not sibcall optimize vararg calls unless the call site is not passing any - // arguments. + // Do not sibcall optimize vararg calls unless the call site is not passing + // any arguments. if (isVarArg && !Outs.empty()) return false; @@ -2493,6 +2577,112 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { // Other Lowering Hooks //===----------------------------------------------------------------------===// +static bool MayFoldLoad(SDValue Op) { + return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); +} + +static bool MayFoldIntoStore(SDValue Op) { + return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); +} + +static bool isTargetShuffle(unsigned Opcode) { + switch(Opcode) { + default: return false; + case X86ISD::PSHUFD: + case X86ISD::PSHUFHW: + case X86ISD::PSHUFLW: + case X86ISD::SHUFPD: + case X86ISD::SHUFPS: + case X86ISD::MOVLHPS: + case X86ISD::MOVLHPD: + case X86ISD::MOVHLPS: + case X86ISD::MOVLPS: + case X86ISD::MOVLPD: + case X86ISD::MOVSHDUP: + case X86ISD::MOVSLDUP: + case X86ISD::MOVSS: + case X86ISD::MOVSD: + case X86ISD::UNPCKLPS: + case X86ISD::UNPCKLPD: + case X86ISD::PUNPCKLWD: + case X86ISD::PUNPCKLBW: + case X86ISD::PUNPCKLDQ: + case X86ISD::PUNPCKLQDQ: + case X86ISD::UNPCKHPS: + case X86ISD::UNPCKHPD: + case X86ISD::PUNPCKHWD: + case X86ISD::PUNPCKHBW: + case X86ISD::PUNPCKHDQ: + case X86ISD::PUNPCKHQDQ: + return true; + } + return false; +} + +static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, + SDValue V1, SelectionDAG &DAG) { + switch(Opc) { + default: llvm_unreachable("Unknown x86 shuffle node"); + case X86ISD::MOVSHDUP: + case X86ISD::MOVSLDUP: + return DAG.getNode(Opc, dl, VT, V1); + } + + return SDValue(); +} + +static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, + SDValue V1, unsigned TargetMask, SelectionDAG &DAG) { + switch(Opc) { + default: llvm_unreachable("Unknown x86 shuffle node"); + case X86ISD::PSHUFD: + case X86ISD::PSHUFHW: + case X86ISD::PSHUFLW: + return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); + } + + return SDValue(); +} + +static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, + SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) { + switch(Opc) { + default: llvm_unreachable("Unknown x86 shuffle node"); + case X86ISD::SHUFPD: + case X86ISD::SHUFPS: + return DAG.getNode(Opc, dl, VT, V1, V2, + DAG.getConstant(TargetMask, MVT::i8)); + } + return SDValue(); +} + +static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG) { + switch(Opc) { + default: llvm_unreachable("Unknown x86 shuffle node"); + case X86ISD::MOVLHPS: + case X86ISD::MOVLHPD: + case X86ISD::MOVHLPS: + case X86ISD::MOVLPS: + case X86ISD::MOVLPD: + case X86ISD::MOVSS: + case X86ISD::MOVSD: + case X86ISD::UNPCKLPS: + case X86ISD::UNPCKLPD: + case X86ISD::PUNPCKLWD: + case X86ISD::PUNPCKLBW: + case X86ISD::PUNPCKLDQ: + case X86ISD::PUNPCKLQDQ: + case X86ISD::UNPCKHPS: + case X86ISD::UNPCKHPD: + case X86ISD::PUNPCKHWD: + case X86ISD::PUNPCKHBW: + case X86ISD::PUNPCKHDQ: + case X86ISD::PUNPCKHQDQ: + return DAG.getNode(Opc, dl, VT, V1, V2); + } + return SDValue(); +} SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -3347,18 +3537,27 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest - // type. This ensures they get CSE'd. + // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted + // to their dest type. This ensures they get CSE'd. SDValue Vec; if (VT.getSizeInBits() == 64) { // MMX SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - } else if (HasSSE2) { // SSE2 - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); - } else { // SSE1 + } else if (VT.getSizeInBits() == 128) { + if (HasSSE2) { // SSE2 + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + } else { // SSE1 + SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); + } + } else if (VT.getSizeInBits() == 256) { // AVX + // 256-bit logic and arithmetic instructions in AVX are + // all floating-point, no support for integer ops. Default + // to emitting fp zeroed vectors then. SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); } return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } @@ -3372,9 +3571,9 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { // type. This ensures they get CSE'd. SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); SDValue Vec; - if (VT.getSizeInBits() == 64) // MMX + if (VT.getSizeInBits() == 64) // MMX Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else // SSE + else // SSE Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } @@ -3439,9 +3638,8 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); } -/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. -static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, - bool HasSSE2) { +/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32. +static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { if (SV->getValueType(0).getVectorNumElements() <= 4) return SDValue(SV, 0); @@ -3488,68 +3686,253 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); } -/// getNumOfConsecutiveZeros - Return the number of elements in a result of -/// a shuffle that is zero. -static -unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems, - bool Low, SelectionDAG &DAG) { - unsigned NumZeros = 0; - for (int i = 0; i < NumElems; ++i) { - unsigned Index = Low ? i : NumElems-i-1; - int Idx = SVOp->getMaskElt(Index); - if (Idx < 0) { - ++NumZeros; - continue; - } - SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); - if (Elt.getNode() && X86::isZeroNode(Elt)) - ++NumZeros; - else +/// getShuffleScalarElt - Returns the scalar element that will make up the ith +/// element of the result of the vector shuffle. +SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, + unsigned Depth) { + if (Depth == 6) + return SDValue(); // Limit search depth. + + SDValue V = SDValue(N, 0); + EVT VT = V.getValueType(); + unsigned Opcode = V.getOpcode(); + + // Recurse into ISD::VECTOR_SHUFFLE node to find scalars. + if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) { + Index = SV->getMaskElt(Index); + + if (Index < 0) + return DAG.getUNDEF(VT.getVectorElementType()); + + int NumElems = VT.getVectorNumElements(); + SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1); + return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1); + } + + // Recurse into target specific vector shuffles to find scalars. + if (isTargetShuffle(Opcode)) { + int NumElems = VT.getVectorNumElements(); + SmallVector<unsigned, 16> ShuffleMask; + SDValue ImmN; + + switch(Opcode) { + case X86ISD::SHUFPS: + case X86ISD::SHUFPD: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeSHUFPSMask(NumElems, + cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::PUNPCKHBW: + case X86ISD::PUNPCKHWD: + case X86ISD::PUNPCKHDQ: + case X86ISD::PUNPCKHQDQ: + DecodePUNPCKHMask(NumElems, ShuffleMask); break; + case X86ISD::UNPCKHPS: + case X86ISD::UNPCKHPD: + DecodeUNPCKHPMask(NumElems, ShuffleMask); + break; + case X86ISD::PUNPCKLBW: + case X86ISD::PUNPCKLWD: + case X86ISD::PUNPCKLDQ: + case X86ISD::PUNPCKLQDQ: + DecodePUNPCKLMask(NumElems, ShuffleMask); + break; + case X86ISD::UNPCKLPS: + case X86ISD::UNPCKLPD: + DecodeUNPCKLPMask(NumElems, ShuffleMask); + break; + case X86ISD::MOVHLPS: + DecodeMOVHLPSMask(NumElems, ShuffleMask); + break; + case X86ISD::MOVLHPS: + DecodeMOVLHPSMask(NumElems, ShuffleMask); + break; + case X86ISD::PSHUFD: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFMask(NumElems, + cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::PSHUFHW: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::PSHUFLW: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); + break; + case X86ISD::MOVSS: + case X86ISD::MOVSD: { + // The index 0 always comes from the first element of the second source, + // this is why MOVSS and MOVSD are used in the first place. The other + // elements come from the other positions of the first source vector. + unsigned OpNum = (Index == 0) ? 1 : 0; + return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG, + Depth+1); + } + default: + assert("not implemented for target shuffle node"); + return SDValue(); + } + + Index = ShuffleMask[Index]; + if (Index < 0) + return DAG.getUNDEF(VT.getVectorElementType()); + + SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1); + return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, + Depth+1); } - return NumZeros; -} -/// isVectorShift - Returns true if the shuffle can be implemented as a -/// logical left or right shift of a vector. -/// FIXME: split into pslldqi, psrldqi, palignr variants. -static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, - bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); + // Actual nodes that may contain scalar elements + if (Opcode == ISD::BIT_CONVERT) { + V = V.getOperand(0); + EVT SrcVT = V.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); - isLeft = true; - unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG); - if (!NumZeros) { - isLeft = false; - NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG); - if (!NumZeros) - return false; + if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems) + return SDValue(); + } + + if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) + return (Index == 0) ? V.getOperand(0) + : DAG.getUNDEF(VT.getVectorElementType()); + + if (V.getOpcode() == ISD::BUILD_VECTOR) + return V.getOperand(Index); + + return SDValue(); +} + +/// getNumOfConsecutiveZeros - Return the number of elements of a vector +/// shuffle operation which come from a consecutively from a zero. The +/// search can start in two diferent directions, from left or right. +static +unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems, + bool ZerosFromLeft, SelectionDAG &DAG) { + int i = 0; + + while (i < NumElems) { + unsigned Index = ZerosFromLeft ? i : NumElems-i-1; + SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0); + if (!(Elt.getNode() && + (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt)))) + break; + ++i; } + + return i; +} + +/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to +/// MaskE correspond consecutively to elements from one of the vector operands, +/// starting from its index OpIdx. Also tell OpNum which source vector operand. +static +bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE, + int OpIdx, int NumElems, unsigned &OpNum) { bool SeenV1 = false; bool SeenV2 = false; - for (unsigned i = NumZeros; i < NumElems; ++i) { - unsigned Val = isLeft ? (i - NumZeros) : i; - int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); - if (Idx_ < 0) + + for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) { + int Idx = SVOp->getMaskElt(i); + // Ignore undef indicies + if (Idx < 0) continue; - unsigned Idx = (unsigned) Idx_; + if (Idx < NumElems) SeenV1 = true; - else { - Idx -= NumElems; + else SeenV2 = true; - } - if (Idx != Val) + + // Only accept consecutive elements from the same vector + if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2)) return false; } - if (SeenV1 && SeenV2) + + OpNum = SeenV1 ? 0 : 1; + return true; +} + +/// isVectorShiftRight - Returns true if the shuffle can be implemented as a +/// logical left shift of a vector. +static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, + bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { + unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); + unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, + false /* check zeros from right */, DAG); + unsigned OpSrc; + + if (!NumZeros) + return false; + + // Considering the elements in the mask that are not consecutive zeros, + // check if they consecutively come from only one of the source vectors. + // + // V1 = {X, A, B, C} 0 + // \ \ \ / + // vector_shuffle V1, V2 <1, 2, 3, X> + // + if (!isShuffleMaskConsecutive(SVOp, + 0, // Mask Start Index + NumElems-NumZeros-1, // Mask End Index + NumZeros, // Where to start looking in the src vector + NumElems, // Number of elements in vector + OpSrc)) // Which source operand ? + return false; + + isLeft = false; + ShAmt = NumZeros; + ShVal = SVOp->getOperand(OpSrc); + return true; +} + +/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a +/// logical left shift of a vector. +static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, + bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { + unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); + unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, + true /* check zeros from left */, DAG); + unsigned OpSrc; + + if (!NumZeros) + return false; + + // Considering the elements in the mask that are not consecutive zeros, + // check if they consecutively come from only one of the source vectors. + // + // 0 { A, B, X, X } = V2 + // / \ / / + // vector_shuffle V1, V2 <X, X, 4, 5> + // + if (!isShuffleMaskConsecutive(SVOp, + NumZeros, // Mask Start Index + NumElems-1, // Mask End Index + 0, // Where to start looking in the src vector + NumElems, // Number of elements in vector + OpSrc)) // Which source operand ? return false; - ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1); + isLeft = true; ShAmt = NumZeros; + ShVal = SVOp->getOperand(OpSrc); return true; } +/// isVectorShift - Returns true if the shuffle can be implemented as a +/// logical left or right shift of a vector. +static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, + bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { + if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) || + isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt)) + return true; + + return false; +} /// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. /// @@ -3779,9 +4162,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - // All zero's are handled with pxor, all one's are handled with pcmpeqd. - if (ISD::isBuildVectorAllZeros(Op.getNode()) - || ISD::isBuildVectorAllOnes(Op.getNode())) { + // All zero's are handled with pxor in SSE2 and above, xorps in SSE1. + // All one's are handled with pcmpeqd. In AVX, zero's are handled with + // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd + // is present, so AllOnes is ignored. + if (ISD::isBuildVectorAllZeros(Op.getNode()) || + (Op.getValueType().getSizeInBits() != 256 && + ISD::isBuildVectorAllOnes(Op.getNode()))) { // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are // eliminated on x86-32 hosts. @@ -3819,10 +4206,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } } - if (NumNonZero == 0) { - // All undef vector. Return an UNDEF. All zero vectors were handled above. + // All undef vector. Return an UNDEF. All zero vectors were handled above. + if (NumNonZero == 0) return DAG.getUNDEF(VT); - } // Special case for single non-zero, non-undef, element. if (NumNonZero == 1) { @@ -3960,7 +4346,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (EVTBits == 16 && NumElems == 8) { SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, - *this); + *this); if (V.getNode()) return V; } @@ -4014,28 +4400,51 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (LD.getNode()) return LD; - // For SSE 4.1, use inserts into undef. + // For SSE 4.1, use insertps to put the high elements into the low element. if (getSubtarget()->hasSSE41()) { - V[0] = DAG.getUNDEF(VT); - for (unsigned i = 0; i < NumElems; ++i) - if (Op.getOperand(i).getOpcode() != ISD::UNDEF) - V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0], + SDValue Result; + if (Op.getOperand(0).getOpcode() != ISD::UNDEF) + Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); + else + Result = DAG.getUNDEF(VT); + + for (unsigned i = 1; i < NumElems; ++i) { + if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue; + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, Op.getOperand(i), DAG.getIntPtrConstant(i)); - return V[0]; + } + return Result; } - // Otherwise, expand into a number of unpckl* - // e.g. for v4f32 + // Otherwise, expand into a number of unpckl*, start by extending each of + // our (non-undef) elements to the full vector width with the element in the + // bottom slot of the vector (which generates no code for SSE). + for (unsigned i = 0; i < NumElems; ++i) { + if (Op.getOperand(i).getOpcode() != ISD::UNDEF) + V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); + else + V[i] = DAG.getUNDEF(VT); + } + + // Next, we iteratively mix elements, e.g. for v4f32: // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - for (unsigned i = 0; i < NumElems; ++i) - V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); - NumElems >>= 1; - while (NumElems != 0) { - for (unsigned i = 0; i < NumElems; ++i) - V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]); - NumElems >>= 1; + unsigned EltStride = NumElems >> 1; + while (EltStride != 0) { + for (unsigned i = 0; i < EltStride; ++i) { + // If V[i+EltStride] is undef and this is the first round of mixing, + // then it is safe to just drop this shuffle: V[i] is already in the + // right place, the one element (since it's the first round) being + // inserted as undef can be dropped. This isn't safe for successive + // rounds because they will permute elements within both vectors. + if (V[i+EltStride].getOpcode() == ISD::UNDEF && + EltStride == NumElems/2) + continue; + + V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]); + } + EltStride >>= 1; } return V[0]; } @@ -4074,10 +4483,10 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { // 2. [ssse3] 1 x pshufb // 3. [ssse3] 2 x pshufb + 1 x por // 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) -static -SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, - const X86TargetLowering &TLI) { +SDValue +X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, + SelectionDAG &DAG) const { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); @@ -4128,7 +4537,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // quads, disable the next transformation since it does not help SSSE3. bool V1Used = InputQuads[0] || InputQuads[1]; bool V2Used = InputQuads[2] || InputQuads[3]; - if (TLI.getSubtarget()->hasSSSE3()) { + if (Subtarget->hasSSSE3()) { if (InputQuads.count() == 2 && V1Used && V2Used) { BestLoQuad = InputQuads.find_first(); BestHiQuad = InputQuads.find_next(BestLoQuad); @@ -4187,15 +4596,21 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // If we've eliminated the use of V2, and the new mask is a pshuflw or // pshufhw, that's as cheap as it gets. Return the new shuffle. if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { - return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, + unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW; + unsigned TargetMask = 0; + NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); + TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()): + X86::getShufflePSHUFLWImmediate(NewV.getNode()); + V1 = NewV.getOperand(0); + return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG); } } // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. - if (TLI.getSubtarget()->hasSSSE3()) { + if (Subtarget->hasSSSE3()) { SmallVector<SDValue,16> pshufbMask; // If we have elements from both input vectors, set the high bit of the @@ -4262,6 +4677,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, MaskV.push_back(i); NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); + + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) + NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16, + NewV.getOperand(0), + X86::getShufflePSHUFLWImmediate(NewV.getNode()), + DAG); } // If BestHi >= 0, generate a pshufhw to put the high elements in order, @@ -4284,6 +4705,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, } NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); + + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) + NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16, + NewV.getOperand(0), + X86::getShufflePSHUFHWImmediate(NewV.getNode()), + DAG); } // In case BestHi & BestLo were both -1, which means each quadword has a word @@ -4473,7 +4900,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, SDValue V2 = SVOp->getOperand(1); unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; - EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); + EVT MaskVT = (NewWidth == 4) ? MVT::v4i16 : MVT::v2i32; EVT NewVT = MaskVT; switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unexpected!"); @@ -4697,6 +5124,129 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); } +static bool MayFoldVectorLoad(SDValue V) { + if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT) + V = V.getOperand(0); + if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) + V = V.getOperand(0); + if (MayFoldLoad(V)) + return true; + return false; +} + +static +SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, + bool HasSSE2) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + EVT VT = Op.getValueType(); + + assert(VT != MVT::v2i64 && "unsupported shuffle type"); + + if (HasSSE2 && VT == MVT::v2f64) + return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG); + + // v4f32 or v4i32 + return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG); +} + +static +SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + EVT VT = Op.getValueType(); + + assert((VT == MVT::v4i32 || VT == MVT::v4f32) && + "unsupported shuffle type"); + + if (V2.getOpcode() == ISD::UNDEF) + V2 = V1; + + // v4i32 or v4f32 + return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG); +} + +static +SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + EVT VT = Op.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + + // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second + // operand of these instructions is only memory, so check if there's a + // potencial load folding here, otherwise use SHUFPS or MOVSD to match the + // same masks. + bool CanFoldLoad = false; + + // Trivial case, when V2 comes from a load. + if (MayFoldVectorLoad(V2)) + CanFoldLoad = true; + + // When V1 is a load, it can be folded later into a store in isel, example: + // (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1) + // turns into: + // (MOVLPSmr addr:$src1, VR128:$src2) + // So, recognize this potential and also use MOVLPS or MOVLPD + if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op)) + CanFoldLoad = true; + + if (CanFoldLoad) { + if (HasSSE2 && NumElems == 2) + return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); + + if (NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); + } + + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + // movl and movlp will both match v2i64, but v2i64 is never matched by + // movl earlier because we make it strict to avoid messing with the movlp load + // folding logic (see the code above getMOVLP call). Match it here then, + // this is horrible, but will stay like this until we move all shuffle + // matching to x86 specific nodes. Note that for the 1st condition all + // types are matched with movsd. + if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp)) + return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); + else if (HasSSE2) + return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG); + + + assert(VT != MVT::v4i32 && "unsupported shuffle type"); + + // Invert the operand order and use SHUFPS to match it. + return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1, + X86::getShuffleSHUFImmediate(SVOp), DAG); +} + +static inline unsigned getUNPCKLOpcode(EVT VT) { + switch(VT.getSimpleVT().SimpleTy) { + case MVT::v4i32: return X86ISD::PUNPCKLDQ; + case MVT::v2i64: return X86ISD::PUNPCKLQDQ; + case MVT::v4f32: return X86ISD::UNPCKLPS; + case MVT::v2f64: return X86ISD::UNPCKLPD; + case MVT::v16i8: return X86ISD::PUNPCKLBW; + case MVT::v8i16: return X86ISD::PUNPCKLWD; + default: + llvm_unreachable("Unknow type for unpckl"); + } + return 0; +} + +static inline unsigned getUNPCKHOpcode(EVT VT) { + switch(VT.getSimpleVT().SimpleTy) { + case MVT::v4i32: return X86ISD::PUNPCKHDQ; + case MVT::v2i64: return X86ISD::PUNPCKHQDQ; + case MVT::v4f32: return X86ISD::UNPCKHPS; + case MVT::v2f64: return X86ISD::UNPCKHPD; + case MVT::v16i8: return X86ISD::PUNPCKHBW; + case MVT::v8i16: return X86ISD::PUNPCKHWD; + default: + llvm_unreachable("Unknow type for unpckh"); + } + return 0; +} + SDValue X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); @@ -4710,6 +5260,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; + bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX(); + bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX(); + MachineFunction &MF = DAG.getMachineFunction(); + bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); if (isZeroShuffle(SVOp)) return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); @@ -4718,7 +5272,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (SVOp->isSplat()) { if (isMMX || NumElems < 4) return Op; - return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); + return PromoteSplat(SVOp, DAG); } // If the shuffle can be profitably rewritten as a narrower shuffle, then @@ -4746,8 +5300,35 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } } - if (X86::isPSHUFDMask(SVOp)) - return Op; + // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and + // unpckh_undef). Only use pshufd if speed is more important than size. + if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + + if (X86::isPSHUFDMask(SVOp)) { + // The actual implementation will match the mask in the if above and then + // during isel it can match several different instructions, not only pshufd + // as its name says, sad but true, emulate the behavior for now... + if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64))) + return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG); + + unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp); + + if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32)) + return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG); + + if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) + return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1, + TargetMask, DAG); + + if (VT == MVT::v4f32) + return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1, + TargetMask, DAG); + } // Check if this can be converted into a logical shift. bool isLeft = false; @@ -4768,17 +5349,32 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); - if (!isMMX) - return Op; + if (!isMMX && !X86::isMOVLPMask(SVOp)) { + if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) + return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); + + if (VT == MVT::v4i32 || VT == MVT::v4f32) + return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG); + } } // FIXME: fold these into legal mask. - if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || - X86::isMOVSLDUPMask(SVOp) || - X86::isMOVHLPSMask(SVOp) || - X86::isMOVLHPSMask(SVOp) || - X86::isMOVLPMask(SVOp))) - return Op; + if (!isMMX) { + if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) + return getMOVLowToHigh(Op, dl, DAG, HasSSE2); + + if (X86::isMOVHLPSMask(SVOp)) + return getMOVHighToLow(Op, dl, DAG); + + if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); + + if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); + + if (X86::isMOVLPMask(SVOp)) + return getMOVLP(Op, dl, DAG, HasSSE2); + } if (ShouldXformToMOVHLPS(SVOp) || ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) @@ -4818,11 +5414,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKL_v_undef_Mask(SVOp) || - X86::isUNPCKH_v_undef_Mask(SVOp) || - X86::isUNPCKLMask(SVOp) || - X86::isUNPCKHMask(SVOp)) - return Op; + if (X86::isUNPCKLMask(SVOp)) + return (isMMX) ? + Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + + if (X86::isUNPCKHMask(SVOp)) + return (isMMX) ? + Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -4844,11 +5442,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // FIXME: this seems wrong. SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); - if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || - X86::isUNPCKH_v_undef_Mask(NewSVOp) || - X86::isUNPCKLMask(NewSVOp) || - X86::isUNPCKHMask(NewSVOp)) - return NewOp; + + if (X86::isUNPCKLMask(NewSVOp)) + return (isMMX) ? + NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + + if (X86::isUNPCKHMask(NewSVOp)) + return (isMMX) ? + NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); } // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. @@ -4857,15 +5458,52 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) return CommuteVectorShuffle(SVOp, DAG); - // Check for legal shuffle and return? - SmallVector<int, 16> PermMask; - SVOp->getMask(PermMask); - if (isShuffleMaskLegal(PermMask, VT)) + // The checks below are all present in isShuffleMaskLegal, but they are + // inlined here right now to enable us to directly emit target specific + // nodes, and remove one by one until they don't return Op anymore. + SmallVector<int, 16> M; + SVOp->getMask(M); + + // Very little shuffling can be done for 64-bit vectors right now. + if (VT.getSizeInBits() == 64) + return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ? Op : SDValue(); + + // FIXME: pshufb, blends, shifts. + if (VT.getVectorNumElements() == 2 || + ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + isPALIGNRMask(M, VT, Subtarget->hasSSSE3())) return Op; + if (isPSHUFHWMask(M, VT)) + return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1, + X86::getShufflePSHUFHWImmediate(SVOp), + DAG); + + if (isPSHUFLWMask(M, VT)) + return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1, + X86::getShufflePSHUFLWImmediate(SVOp), + DAG); + + if (isSHUFPMask(M, VT)) { + unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp); + if (VT == MVT::v4f32 || VT == MVT::v4i32) + return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2, + TargetMask, DAG); + if (VT == MVT::v2f64 || VT == MVT::v2i64) + return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2, + TargetMask, DAG); + } + + if (X86::isUNPCKL_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + if (X86::isUNPCKH_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { - SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); + SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG); if (NewOp.getNode()) return NewOp; } @@ -6922,24 +7560,58 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } - // ptest intrinsics. The intrinsic these come from are designed to return - // an integer value, not just an instruction so lower it to the ptest - // pattern and a setcc for the result. + // ptest and testp intrinsics. The intrinsic these come from are designed to + // return an integer value, not just an instruction so lower it to the ptest + // or testp pattern and a setcc for the result. case Intrinsic::x86_sse41_ptestz: case Intrinsic::x86_sse41_ptestc: - case Intrinsic::x86_sse41_ptestnzc:{ + case Intrinsic::x86_sse41_ptestnzc: + case Intrinsic::x86_avx_ptestz_256: + case Intrinsic::x86_avx_ptestc_256: + case Intrinsic::x86_avx_ptestnzc_256: + case Intrinsic::x86_avx_vtestz_ps: + case Intrinsic::x86_avx_vtestc_ps: + case Intrinsic::x86_avx_vtestnzc_ps: + case Intrinsic::x86_avx_vtestz_pd: + case Intrinsic::x86_avx_vtestc_pd: + case Intrinsic::x86_avx_vtestnzc_pd: + case Intrinsic::x86_avx_vtestz_ps_256: + case Intrinsic::x86_avx_vtestc_ps_256: + case Intrinsic::x86_avx_vtestnzc_ps_256: + case Intrinsic::x86_avx_vtestz_pd_256: + case Intrinsic::x86_avx_vtestc_pd_256: + case Intrinsic::x86_avx_vtestnzc_pd_256: { + bool IsTestPacked = false; unsigned X86CC = 0; switch (IntNo) { default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); + case Intrinsic::x86_avx_vtestz_ps: + case Intrinsic::x86_avx_vtestz_pd: + case Intrinsic::x86_avx_vtestz_ps_256: + case Intrinsic::x86_avx_vtestz_pd_256: + IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestz: + case Intrinsic::x86_avx_ptestz_256: // ZF = 1 X86CC = X86::COND_E; break; + case Intrinsic::x86_avx_vtestc_ps: + case Intrinsic::x86_avx_vtestc_pd: + case Intrinsic::x86_avx_vtestc_ps_256: + case Intrinsic::x86_avx_vtestc_pd_256: + IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestc: + case Intrinsic::x86_avx_ptestc_256: // CF = 1 X86CC = X86::COND_B; break; + case Intrinsic::x86_avx_vtestnzc_ps: + case Intrinsic::x86_avx_vtestnzc_pd: + case Intrinsic::x86_avx_vtestnzc_ps_256: + case Intrinsic::x86_avx_vtestnzc_pd_256: + IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestnzc: + case Intrinsic::x86_avx_ptestnzc_256: // ZF and CF = 0 X86CC = X86::COND_A; break; @@ -6947,7 +7619,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); - SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS); + unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST; + SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS); SDValue CC = DAG.getConstant(X86CC, MVT::i8); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); @@ -7110,12 +7783,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Handler = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP, - getPointerTy()); + SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, + Subtarget->is64Bit() ? X86::RBP : X86::EBP, + getPointerTy()); unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); - SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame, - DAG.getIntPtrConstant(-TD->getPointerSize())); + SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, + DAG.getIntPtrConstant(TD->getPointerSize())); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); @@ -7218,7 +7892,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; if (InRegCount > 2) { - report_fatal_error("Nest register in use - reduce number of inreg parameters!"); + report_fatal_error("Nest register in use - reduce number of inreg" + " parameters!"); } } break; @@ -7439,6 +8114,86 @@ SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const { return Res; } +SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + + LLVMContext *Context = DAG.getContext(); + + assert(Subtarget->hasSSE41() && "Cannot lower SHL without SSE4.1 or later"); + + if (VT == MVT::v4i32) { + Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32), + Op.getOperand(1), DAG.getConstant(23, MVT::i32)); + + ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U)); + + std::vector<Constant*> CV(4, CI); + Constant *C = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); + + Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend); + Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op); + Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); + return DAG.getNode(ISD::MUL, dl, VT, Op, R); + } + if (VT == MVT::v16i8) { + // a = a << 5; + Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), + Op.getOperand(1), DAG.getConstant(5, MVT::i32)); + + ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15)); + ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63)); + + std::vector<Constant*> CVM1(16, CM1); + std::vector<Constant*> CVM2(16, CM2); + Constant *C = ConstantVector::get(CVM1); + SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); + + // r = pblendv(r, psllw(r & (char16)15, 4), a); + M = DAG.getNode(ISD::AND, dl, VT, R, M); + M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, + DAG.getConstant(4, MVT::i32)); + R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), + R, M, Op); + // a += a + Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); + + C = ConstantVector::get(CVM2); + CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + PseudoSourceValue::getConstantPool(), 0, false, false, 16); + + // r = pblendv(r, psllw(r & (char16)63, 2), a); + M = DAG.getNode(ISD::AND, dl, VT, R, M); + M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, + DAG.getConstant(2, MVT::i32)); + R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), + R, M, Op); + // a += a + Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); + + // return pblendv(r, r+r, a); + R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), + R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op); + return R; + } + return SDValue(); +} SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { // Lower the "add/sub/mul with overflow" instruction into a regular ins plus @@ -7508,6 +8263,50 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return Sum; } +SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ + DebugLoc dl = Op.getDebugLoc(); + + if (!Subtarget->hasSSE2()) { + SDValue Chain = Op.getOperand(0); + SDValue Zero = DAG.getConstant(0, + Subtarget->is64Bit() ? MVT::i64 : MVT::i32); + SDValue Ops[] = { + DAG.getRegister(X86::ESP, MVT::i32), // Base + DAG.getTargetConstant(1, MVT::i8), // Scale + DAG.getRegister(0, MVT::i32), // Index + DAG.getTargetConstant(0, MVT::i32), // Disp + DAG.getRegister(0, MVT::i32), // Segment. + Zero, + Chain + }; + SDNode *Res = + DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, + array_lengthof(Ops)); + return SDValue(Res, 0); + } + + unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue(); + if (!isDev) + return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); + + unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + + // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; + if (!Op1 && !Op2 && !Op3 && Op4) + return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0)); + + // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; + if (Op1 && !Op2 && !Op3 && !Op4) + return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0)); + + // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), + // (MFENCE)>; + return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); +} + SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { EVT T = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); @@ -7597,6 +8396,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); @@ -7640,6 +8440,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTLZ: return LowerCTLZ(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); case ISD::MUL: return LowerMUL_V2I64(Op, DAG); + case ISD::SHL: return LowerSHL(Op, DAG); case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: @@ -7852,6 +8653,40 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::AND: return "X86ISD::AND"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; + case X86ISD::TESTP: return "X86ISD::TESTP"; + case X86ISD::PALIGN: return "X86ISD::PALIGN"; + case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; + case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; + case X86ISD::PSHUFHW_LD: return "X86ISD::PSHUFHW_LD"; + case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; + case X86ISD::PSHUFLW_LD: return "X86ISD::PSHUFLW_LD"; + case X86ISD::SHUFPS: return "X86ISD::SHUFPS"; + case X86ISD::SHUFPD: return "X86ISD::SHUFPD"; + case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS"; + case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD"; + case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS"; + case X86ISD::MOVHLPD: return "X86ISD::MOVHLPD"; + case X86ISD::MOVLPS: return "X86ISD::MOVLPS"; + case X86ISD::MOVLPD: return "X86ISD::MOVLPD"; + case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP"; + case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP"; + case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP"; + case X86ISD::MOVSHDUP_LD: return "X86ISD::MOVSHDUP_LD"; + case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD"; + case X86ISD::MOVSD: return "X86ISD::MOVSD"; + case X86ISD::MOVSS: return "X86ISD::MOVSS"; + case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; + case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; + case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; + case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; + case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; + case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD"; + case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ"; + case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ"; + case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW"; + case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; + case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; + case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; } @@ -7863,6 +8698,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { // X86 supports extremely general addressing modes. CodeModel::Model M = getTargetMachine().getCodeModel(); + Reloc::Model R = getTargetMachine().getRelocationModel(); // X86 allows a sign-extended 32-bit immediate field as a displacement. if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL)) @@ -7882,7 +8718,8 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, return false; // If lower 4G is not available, then we must use rip-relative addressing. - if (Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1)) + if ((M != CodeModel::Small || R != Reloc::Static) && + Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1)) return false; } @@ -8368,19 +9205,31 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, } // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 -// all of this code can be replaced with that in the .td file. +// or XMM0_V32I8 in AVX all of this code can be replaced with that +// in the .td file. MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { + assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) && + "Target must have SSE4.2 or AVX features enabled"); + DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); unsigned Opc; - if (memArg) - Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; - else - Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; + + if (!Subtarget->hasAVX()) { + if (memArg) + Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; + else + Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; + } else { + if (memArg) + Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm; + else + Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr; + } MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc)); @@ -8562,7 +9411,8 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, .addReg(X86::EAX, RegState::Implicit) .addReg(X86::ESP, RegState::Implicit) .addReg(X86::EAX, RegState::Define | RegState::Implicit) - .addReg(X86::ESP, RegState::Define | RegState::Implicit); + .addReg(X86::ESP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -8579,6 +9429,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo()); DebugLoc DL = MI->getDebugLoc(); MachineFunction *F = BB->getParent(); + bool IsWin64 = Subtarget->isTargetWin64(); assert(MI->getOperand(3).isGlobal() && "This should be a global"); @@ -8590,7 +9441,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, MI->getOperand(3).getTargetFlags()) .addReg(0); - MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); + MIB = BuildMI(*BB, MI, DL, TII->get(IsWin64 ? X86::WINCALL64m : X86::CALL64m)); addDirectMem(MIB, X86::RDI); } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, @@ -8727,12 +9578,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } // String/text processing lowering. case X86::PCMPISTRM128REG: + case X86::VPCMPISTRM128REG: return EmitPCMP(MI, BB, 3, false /* in-mem */); case X86::PCMPISTRM128MEM: + case X86::VPCMPISTRM128MEM: return EmitPCMP(MI, BB, 3, true /* in-mem */); case X86::PCMPESTRM128REG: + case X86::VPCMPESTRM128REG: return EmitPCMP(MI, BB, 5, false /* in mem */); case X86::PCMPESTRM128MEM: + case X86::VPCMPESTRM128MEM: return EmitPCMP(MI, BB, 5, true /* in mem */); // Atomic Lowering. @@ -8966,21 +9821,20 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); - ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); if (VT.getSizeInBits() != 128) return SDValue(); SmallVector<SDValue, 16> Elts; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) - Elts.push_back(DAG.getShuffleScalarElt(SVN, i)); - + Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); + return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); } -/// PerformShuffleCombine - Detect vector gather/scatter index generation -/// and convert it from being a bunch of shuffles and extracts to a simple -/// store and scalar loads to extract the elements. +/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index +/// generation and convert it from being a bunch of shuffles and extracts +/// to a simple store and scalar loads to extract the elements. static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { SDValue InputVector = N->getOperand(0); @@ -9030,8 +9884,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, // Store the value to a temporary stack slot. SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, 0, - false, false, 0); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, + 0, false, false, 0); // Replace each use (extract) with a load of the appropriate element. for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(), @@ -9045,11 +9899,12 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue(); SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy()); - SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), OffsetVal, StackPtr); + SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), + OffsetVal, StackPtr); // Load the scalar. - SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, ScalarAddr, - NULL, 0, false, false, 0); + SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, + ScalarAddr, NULL, 0, false, false, 0); // Replace the exact with the load. DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar); @@ -9087,8 +9942,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a min would handle NaNs incorrectly, and swapping // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { if (!UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; @@ -9126,8 +9980,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a max would handle NaNs incorrectly, and swapping // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { if (!UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; @@ -9156,8 +10009,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // cause it to handle NaNs incorrectly. if (!UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) { - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; std::swap(LHS, RHS); } @@ -9182,8 +10034,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, case ISD::SETULT: // Converting this to a max would handle NaNs incorrectly. - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; Opcode = X86ISD::FMAX; break; @@ -9193,8 +10044,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // cause it to handle NaNs incorrectly. if (!UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { - if (!FiniteOnlyFPMath() && - (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; std::swap(LHS, RHS); } @@ -9905,7 +10755,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { default: break; - case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); case ISD::EXTRACT_VECTOR_ELT: return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); @@ -9922,6 +10771,28 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); + case X86ISD::SHUFPS: // Handle all target specific shuffles + case X86ISD::SHUFPD: + case X86ISD::PUNPCKHBW: + case X86ISD::PUNPCKHWD: + case X86ISD::PUNPCKHDQ: + case X86ISD::PUNPCKHQDQ: + case X86ISD::UNPCKHPS: + case X86ISD::UNPCKHPD: + case X86ISD::PUNPCKLBW: + case X86ISD::PUNPCKLWD: + case X86ISD::PUNPCKLDQ: + case X86ISD::PUNPCKLQDQ: + case X86ISD::UNPCKLPS: + case X86ISD::UNPCKLPD: + case X86ISD::MOVHLPS: + case X86ISD::MOVLHPS: + case X86ISD::PSHUFD: + case X86ISD::PSHUFHW: + case X86ISD::PSHUFLW: + case X86ISD::MOVSS: + case X86ISD::MOVSD: + case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); } return SDValue(); @@ -9956,14 +10827,6 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { } } -static bool MayFoldLoad(SDValue Op) { - return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); -} - -static bool MayFoldIntoStore(SDValue Op) { - return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); -} - /// IsDesirableToPromoteOp - This method query the target whether it is /// beneficial for dag combiner to promote the specified node. If true, it /// should return the desired promotion type by reference. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 4e4daa4bc5ca9..d2d9b28a03967 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -248,6 +248,44 @@ namespace llvm { // PTEST - Vector bitwise comparisons PTEST, + // TESTP - Vector packed fp sign bitwise comparisons + TESTP, + + // Several flavors of instructions with vector shuffle behaviors. + PALIGN, + PSHUFD, + PSHUFHW, + PSHUFLW, + PSHUFHW_LD, + PSHUFLW_LD, + SHUFPD, + SHUFPS, + MOVDDUP, + MOVSHDUP, + MOVSLDUP, + MOVSHDUP_LD, + MOVSLDUP_LD, + MOVLHPS, + MOVLHPD, + MOVHLPS, + MOVHLPD, + MOVLPS, + MOVLPD, + MOVSD, + MOVSS, + UNPCKLPS, + UNPCKLPD, + UNPCKHPS, + UNPCKHPD, + PUNPCKLBW, + PUNPCKLWD, + PUNPCKLDQ, + PUNPCKLQDQ, + PUNPCKHBW, + PUNPCKHWD, + PUNPCKHDQ, + PUNPCKHQDQ, + // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, // according to %al. An operator is needed so that this can be expanded // with control flow. @@ -265,7 +303,13 @@ namespace llvm { ATOMXOR64_DAG, ATOMAND64_DAG, ATOMNAND64_DAG, - ATOMSWAP64_DAG + ATOMSWAP64_DAG, + + // Memory barrier + MEMBARRIER, + MFENCE, + SFENCE, + LFENCE // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be @@ -584,12 +628,19 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + /// getStackCookieLocation - Return true if the target stores stack /// protector cookies at a fixed offset in some non-standard address /// space, and populates the address space and offset as /// appropriate. virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const; + protected: + std::pair<const TargetRegisterClass*, uint8_t> + findRepresentativeClass(EVT VT) const; + private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -710,11 +761,16 @@ namespace llvm { SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSHL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; + + // Utility functions to help LowerVECTOR_SHUFFLE + SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 42d0e7f9778ab..0884b61425e9e 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -73,11 +73,7 @@ def GetLo32XForm : SDNodeXForm<imm, [{ return getI32Imm((unsigned)N->getZExtValue()); }]>; -def i64immSExt32 : PatLeaf<(i64 imm), [{ - // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit - // sign extended field. - return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue(); -}]>; +def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>; def i64immZExt32 : PatLeaf<(i64 imm), [{ @@ -158,7 +154,7 @@ let isCall = 1 in // FIXME: We need to teach codegen about single list of call-clobbered // registers. -let isCall = 1 in +let isCall = 1, isCodeGenOnly = 1 in // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. Uses for argument @@ -168,7 +164,7 @@ let isCall = 1 in MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], Uses = [RSP] in { - def WINCALL64pcrel32 : I<0xE8, RawFrm, + def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), "call\t$dst", []>, Requires<[IsWin64]>; @@ -182,7 +178,8 @@ let isCall = 1 in } -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + isCodeGenOnly = 1 in let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, @@ -216,9 +213,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), "jmp{q}\t$dst", []>; def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", - [(brind GR64:$dst)]>; + [(brind GR64:$dst)]>, Requires<[In64BitMode]>; def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", - [(brind (loadi64 addr:$dst))]>; + [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>; def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), "ljmp{q}\t{*}$dst", []>; } @@ -246,7 +243,7 @@ def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in def LEAVE64 : I<0xC9, RawFrm, - (outs), (ins), "leave", []>; + (outs), (ins), "leave", []>, Requires<[In64BitMode]>; let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { let mayLoad = 1 in { def POP64r : I<0x58, AddRegFrm, @@ -330,7 +327,7 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; // Fast system-call instructions def SYSEXIT64 : RI<0x35, RawFrm, - (outs), (ins), "sysexit", []>, TB; + (outs), (ins), "sysexit", []>, TB, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Move Instructions... @@ -374,6 +371,7 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store i64immSExt32:$src, addr:$dst)]>; /// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC. +let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>; @@ -388,7 +386,13 @@ let mayStore = 1 in def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>; +} +// FIXME: These definitions are utterly broken +// Just leave them commented out for now because they're useless outside +// of the large code model, and most compilers won't generate the instructions +// in question. +/* def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src), "mov{q}\t{$src, %rax|%rax, $src}", []>; def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src), @@ -397,6 +401,7 @@ def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins), "mov{q}\t{%rax, $dst|$dst, %rax}", []>; def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), "mov{q}\t{%rax, $dst|$dst, %rax}", []>; +*/ // Moves to and from segment registers def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), @@ -1316,14 +1321,13 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), [] >, TB; -def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), +def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB, - REX_W; + [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. -def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), +def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi64 addr:$src1), i64immSExt8:$src2))]>, TB; @@ -1537,116 +1541,6 @@ def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C64r)>; //===----------------------------------------------------------------------===// -// Conversion Instructions... -// - -// f64 -> signed i64 -def CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src), - "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>; -def CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src), - "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>; -def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "cvtsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse2_cvtsd2si64 VR128:$src))]>; -def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), - (ins f128mem:$src), - "cvtsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (int_x86_sse2_cvtsd2si64 - (load addr:$src)))]>; -def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src), - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (fp_to_sint FR64:$src))]>; -def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src), - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>; -def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse2_cvttsd2si64 VR128:$src))]>; -def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), - (ins f128mem:$src), - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse2_cvttsd2si64 - (load addr:$src)))]>; - -// Signed i64 -> f64 -def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), - "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp GR64:$src))]>; -def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), - "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>; - -let Constraints = "$src1 = $dst" in { -def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), - "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (int_x86_sse2_cvtsi642sd VR128:$src1, - GR64:$src2))]>; -def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2), - "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (int_x86_sse2_cvtsi642sd VR128:$src1, - (loadi64 addr:$src2)))]>; -} // Constraints = "$src1 = $dst" - -// Signed i64 -> f32 -def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src), - "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp GR64:$src))]>; -def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src), - "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>; - -let Constraints = "$src1 = $dst" in { - def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), - "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (int_x86_sse_cvtsi642ss VR128:$src1, - GR64:$src2))]>; - def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, i64mem:$src2), - "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (int_x86_sse_cvtsi642ss VR128:$src1, - (loadi64 addr:$src2)))]>; -} // Constraints = "$src1 = $dst" - -// f32 -> signed i64 -def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src), - "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>; -def CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src), - "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>; -def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "cvtss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse_cvtss2si64 VR128:$src))]>; -def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src), - "cvtss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (int_x86_sse_cvtss2si64 - (load addr:$src)))]>; -def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src), - "cvttss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (fp_to_sint FR32:$src))]>; -def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src), - "cvttss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>; -def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "cvttss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse_cvttss2si64 VR128:$src))]>; -def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), - (ins f32mem:$src), - "cvttss2si{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, - (int_x86_sse_cvttss2si64 (load addr:$src)))]>; - // Descriptor-table support instructions // LLDT is not interpreted specially in 64-bit mode because there is no sign @@ -1726,6 +1620,14 @@ def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), // Atomic Instructions //===----------------------------------------------------------------------===// +// TODO: Get this to fold the constant into the instruction. +let hasSideEffects = 1, Defs = [ESP] in +def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero), + "lock\n\t" + "or{q}\t{$zero, (%rsp)|(%rsp), $zero}", + [(X86MemBarrierNoSSE GR64:$zero)]>, + Requires<[In64BitMode]>, LOCK; + let Defs = [RAX, EFLAGS], Uses = [RAX] in { def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), "lock\n\t" @@ -1772,7 +1674,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), // Optimized codegen when the non-memory output is not used. let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { // FIXME: Use normal add / sub instructions and add lock prefix dynamically. -def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), +def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "lock\n\t" "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs), diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td new file mode 100644 index 0000000000000..d868773d2d690 --- /dev/null +++ b/lib/Target/X86/X86InstrFMA.td @@ -0,0 +1,60 @@ +//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes FMA (Fused Multiply-Add) instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// FMA3 - Intel 3 operand Fused Multiply-Add instructions +//===----------------------------------------------------------------------===// + +multiclass fma_rm<bits<8> opc, string OpcodeStr> { + def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; +} + +multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, + string OpcodeStr, string PackTy> { + defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>; + defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>; + defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>; +} + +let isAsmParserOnly = 1 in { + // Fused Multiply-Add + defm VFMADDPS : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">; + defm VFMADDPD : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W; + defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">; + defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W; + defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">; + defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W; + defm VFMSUBPS : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">; + defm VFMSUBPD : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W; + + // Fused Negative Multiply-Add + defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">; + defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W; + defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">; + defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W; +} diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index da93de988d506..9c9bcc7d0b6a0 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -108,10 +108,6 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>; } -let isTerminator = 1 in - let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in - def FP_REG_KILL : I<0, Pseudo, (outs), (ins), "##FP_REG_KILL", []>; - // All FP Stack operations are represented with four instructions here. The // first three instructions, generated by the instruction selector, use "RFP32" // "RFP64" or "RFP80" registers: traditional register files to reference 32-bit, @@ -157,7 +153,7 @@ def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR } -// FpIf32, FpIf64 - Floating Point Psuedo Instruction template. +// FpIf32, FpIf64 - Floating Point Pseudo Instruction template. // f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1. // f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2. // f80 instructions cannot use SSE and use neither of these. diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index cc3fdf1efd7b6..79187e9a76d70 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -39,6 +39,7 @@ def MRM_E8 : Format<39>; def MRM_F0 : Format<40>; def MRM_F8 : Format<41>; def MRM_F9 : Format<42>; +def RawFrmImm16 : Format<43>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -210,7 +211,7 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, class FPI<bits<8> o, Format F, dag outs, dag ins, string asm> : I<o, F, outs, ins, asm, []> {} -// FpI_ - Floating Point Psuedo Instruction template. Not Predicated. +// FpI_ - Floating Point Pseudo Instruction template. Not Predicated. class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern> : X86Inst<0, Pseudo, NoImm, outs, ins, ""> { let FPForm = fp; @@ -224,13 +225,13 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern> // Iseg32 - 16-bit segment selector, 32-bit offset class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> { + list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> { let Pattern = pattern; let CodeSize = 3; } class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> { + list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> { let Pattern = pattern; let CodeSize = 3; } @@ -411,6 +412,20 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, Requires<[HasSSE42]>; +// AVX Instruction Templates: +// Instructions introduced in AVX (no SSE equivalent forms) +// +// AVX8I - AVX instructions with T8 and OpSize prefix. +// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8. +class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize, + Requires<[HasAVX]>; +class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize, + Requires<[HasAVX]>; + // AES Instruction Templates: // // AES8I @@ -425,6 +440,18 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, Requires<[HasAES]>; +// CLMUL Instruction Templates +class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>; + +// FMA3 Instruction Templates +class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + OpSize, VEX_4V, Requires<[HasFMA3]>; + // X86-64 Instruction templates... // diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 71c4e8bc147fa..01149b699213d 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -117,9 +117,67 @@ def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, - SDTCisVT<1, v4f32>, - SDTCisVT<2, v4f32>]>; + SDTCisVec<1>, + SDTCisSameAs<2, 1>]>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; +def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; + +// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get +// translated into one of the target nodes below during lowering. +// Note: this is a work in progress... +def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>; + +def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0,1>, SDTCisInt<2>]>; +def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisInt<3>]>; + +def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>, + SDTCisInt<2>]>; + +def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; + +def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; +def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; +def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>; + +def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>; +def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>; + +def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>; +def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>; + +def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>; +def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>; +def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>; + +def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>; +def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>; + +def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>; +def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>; +def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>; +def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; + +def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; +def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; + +def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; +def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; +def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; +def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; + +def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; +def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; +def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; +def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; + +def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>; +def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; +def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; +def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; //===----------------------------------------------------------------------===// // SSE Complex Patterns @@ -148,12 +206,13 @@ def sdmem : Operand<v2f64> { // SSE pattern fragments //===----------------------------------------------------------------------===// +// 128-bit load pattern fragments def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; -// FIXME: move this to a more appropriate place after all AVX is done. +// 256-bit load pattern fragments def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; @@ -174,6 +233,8 @@ def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>; + +// 128-bit aligned load pattern fragments def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>; def alignedloadv2f64 : PatFrag<(ops node:$ptr), @@ -183,7 +244,7 @@ def alignedloadv4i32 : PatFrag<(ops node:$ptr), def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>; -// FIXME: move this to a more appropriate place after all AVX is done. +// 256-bit aligned load pattern fragments def alignedloadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (alignedload node:$ptr))>; def alignedloadv4f64 : PatFrag<(ops node:$ptr), @@ -206,15 +267,20 @@ def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; + +// 128-bit memop pattern fragments def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; -// FIXME: move this to a more appropriate place after all AVX is done. +// 256-bit memop pattern fragments +def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>; def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; +def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; +def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>; // SSSE3 uses MMX registers for some instructions. They aren't aligned on a // 16-byte boundary. @@ -254,6 +320,7 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), return false; }]>; +// 128-bit bitconvert pattern fragments def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; @@ -261,6 +328,9 @@ def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; +// 256-bit bitconvert pattern fragments +def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; + def vzmovl_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ce471eadd78b2..5280940cf437b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -235,6 +235,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::BT64ri8, X86::BT64mi8, 1, 0 }, { X86::CALL32r, X86::CALL32m, 1, 0 }, { X86::CALL64r, X86::CALL64m, 1, 0 }, + { X86::WINCALL64r, X86::WINCALL64m, 1, 0 }, { X86::CMP16ri, X86::CMP16mi, 1, 0 }, { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, { X86::CMP16rr, X86::CMP16mr, 1, 0 }, @@ -667,46 +668,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); } -bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - switch (MI.getOpcode()) { - default: - return false; - case X86::MOV8rr: - case X86::MOV8rr_NOREX: - case X86::MOV16rr: - case X86::MOV32rr: - case X86::MOV64rr: - case X86::MOV32rr_TC: - case X86::MOV64rr_TC: - - // FP Stack register class copies - case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: - case X86::MOV_Fp3264: case X86::MOV_Fp3280: - case X86::MOV_Fp6432: case X86::MOV_Fp8032: - - // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64 - // copies are done with FsMOVAPSrr and FsMOVAPDrr. - - case X86::FsMOVAPSrr: - case X86::FsMOVAPDrr: - case X86::MOVAPSrr: - case X86::MOVAPDrr: - case X86::MOVDQArr: - case X86::MMX_MOVQ64rr: - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "invalid register-register move instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SrcSubIdx = MI.getOperand(1).getSubReg(); - DstSubIdx = MI.getOperand(0).getSubReg(); - return true; - } -} - bool X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, @@ -827,7 +788,7 @@ static bool isFrameStoreOpcode(int Opcode) { unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameLoadOpcode(MI->getOpcode())) - if (isFrameOperand(MI, 1, FrameIndex)) + if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) return MI->getOperand(0).getReg(); return 0; } @@ -866,7 +827,8 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameStoreOpcode(MI->getOpcode())) - if (isFrameOperand(MI, 0, FrameIndex)) + if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 && + isFrameOperand(MI, 0, FrameIndex)) return MI->getOperand(X86::AddrNumOperands).getReg(); return 0; } @@ -1664,14 +1626,6 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { return !isPredicated(MI); } -// For purposes of branch analysis do not count FP_REG_KILL as a terminator. -static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI, - const X86InstrInfo &TII) { - if (MI->getOpcode() == X86::FP_REG_KILL) - return false; - return TII.isUnpredicatedTerminator(MI); -} - bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -1688,7 +1642,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Working from the bottom, when we see a non-terminator instruction, we're // done. - if (!isBrAnalysisUnpredicatedTerminator(I, *this)) + if (!isUnpredicatedTerminator(I)) break; // A terminator that isn't a branch can't easily be handled by this @@ -1891,6 +1845,33 @@ static bool isHReg(unsigned Reg) { return X86::GR8_ABCD_HRegClass.contains(Reg); } +// Try and copy between VR128/VR64 and GR64 registers. +static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { + // SrcReg(VR128) -> DestReg(GR64) + // SrcReg(VR64) -> DestReg(GR64) + // SrcReg(GR64) -> DestReg(VR128) + // SrcReg(GR64) -> DestReg(VR64) + + if (X86::GR64RegClass.contains(DestReg)) { + if (X86::VR128RegClass.contains(SrcReg)) { + // Copy from a VR128 register to a GR64 register. + return X86::MOVPQIto64rr; + } else if (X86::VR64RegClass.contains(SrcReg)) { + // Copy from a VR64 register to a GR64 register. + return X86::MOVSDto64rr; + } + } else if (X86::GR64RegClass.contains(SrcReg)) { + // Copy from a GR64 register to a VR128 register. + if (X86::VR128RegClass.contains(DestReg)) + return X86::MOV64toPQIrr; + // Copy from a GR64 register to a VR64 register. + else if (X86::VR64RegClass.contains(DestReg)) + return X86::MOV64toSDrr; + } + + return 0; +} + void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -1915,6 +1896,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = X86::MOVAPSrr; else if (X86::VR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MMX_MOVQ64rr; + else + Opc = CopyToFromAsymmetricReg(DestReg, SrcReg); if (Opc) { BuildMI(MBB, MI, DL, get(Opc), DestReg) @@ -2046,6 +2029,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); + assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && + "Stack slot too small for store"); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); @@ -2130,8 +2115,9 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, CalleeFrameSize += SlotSize; BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), - &X86::VR128RegClass, &RI); + RC, &RI); } } @@ -2161,8 +2147,9 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (!X86::VR128RegClass.contains(Reg) && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - &X86::VR128RegClass, &RI); + RC, &RI); } } return true; @@ -2423,10 +2410,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = (*LoadMI->memoperands_begin())->getAlignment(); else switch (LoadMI->getOpcode()) { + case X86::AVX_SET0PSY: + case X86::AVX_SET0PDY: + Alignment = 32; + break; case X86::V_SET0PS: case X86::V_SET0PD: case X86::V_SET0PI: case X86::V_SETALLONES: + case X86::AVX_SET0PS: + case X86::AVX_SET0PD: + case X86::AVX_SET0PI: Alignment = 16; break; case X86::FsFLD0SD: @@ -2453,12 +2447,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } else if (Ops.size() != 1) return NULL; + // Make sure the subregisters match. + // Otherwise we risk changing the size of the load. + if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg()) + return NULL; + SmallVector<MachineOperand,X86::AddrNumOperands> MOs; switch (LoadMI->getOpcode()) { case X86::V_SET0PS: case X86::V_SET0PD: case X86::V_SET0PI: case X86::V_SETALLONES: + case X86::AVX_SET0PS: + case X86::AVX_SET0PD: + case X86::AVX_SET0PI: + case X86::AVX_SET0PSY: + case X86::AVX_SET0PDY: case X86::FsFLD0SD: case X86::FsFLD0SS: { // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure. @@ -2485,10 +2489,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Create a constant-pool entry. MachineConstantPool &MCP = *MF.getConstantPool(); const Type *Ty; - if (LoadMI->getOpcode() == X86::FsFLD0SS) + unsigned Opc = LoadMI->getOpcode(); + if (Opc == X86::FsFLD0SS) Ty = Type::getFloatTy(MF.getFunction()->getContext()); - else if (LoadMI->getOpcode() == X86::FsFLD0SD) + else if (Opc == X86::FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction()->getContext()); + else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) + Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? @@ -2991,561 +2998,6 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { return false; } - -/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 -/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand -/// size, and 3) use of X86-64 extended registers. -unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { - unsigned REX = 0; - const TargetInstrDesc &Desc = MI.getDesc(); - - // Pseudo instructions do not need REX prefix byte. - if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) - return 0; - if (Desc.TSFlags & X86II::REX_W) - REX |= 1 << 3; - - unsigned NumOps = Desc.getNumOperands(); - if (NumOps) { - bool isTwoAddr = NumOps > 1 && - Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; - - // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. - unsigned i = isTwoAddr ? 1 : 0; - for (unsigned e = NumOps; i != e; ++i) { - const MachineOperand& MO = MI.getOperand(i); - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - if (isX86_64NonExtLowByteReg(Reg)) - REX |= 0x40; - } - } - - switch (Desc.TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: - if (isX86_64ExtendedReg(MI.getOperand(0))) - REX |= (1 << 0) | (1 << 2); - break; - case X86II::MRMSrcReg: { - if (isX86_64ExtendedReg(MI.getOperand(0))) - REX |= 1 << 2; - i = isTwoAddr ? 2 : 1; - for (unsigned e = NumOps; i != e; ++i) { - const MachineOperand& MO = MI.getOperand(i); - if (isX86_64ExtendedReg(MO)) - REX |= 1 << 0; - } - break; - } - case X86II::MRMSrcMem: { - if (isX86_64ExtendedReg(MI.getOperand(0))) - REX |= 1 << 2; - unsigned Bit = 0; - i = isTwoAddr ? 2 : 1; - for (; i != NumOps; ++i) { - const MachineOperand& MO = MI.getOperand(i); - if (MO.isReg()) { - if (isX86_64ExtendedReg(MO)) - REX |= 1 << Bit; - Bit++; - } - } - break; - } - case X86II::MRM0m: case X86II::MRM1m: - case X86II::MRM2m: case X86II::MRM3m: - case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: - case X86II::MRMDestMem: { - unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); - i = isTwoAddr ? 1 : 0; - if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) - REX |= 1 << 2; - unsigned Bit = 0; - for (; i != e; ++i) { - const MachineOperand& MO = MI.getOperand(i); - if (MO.isReg()) { - if (isX86_64ExtendedReg(MO)) - REX |= 1 << Bit; - Bit++; - } - } - break; - } - default: { - if (isX86_64ExtendedReg(MI.getOperand(0))) - REX |= 1 << 0; - i = isTwoAddr ? 2 : 1; - for (unsigned e = NumOps; i != e; ++i) { - const MachineOperand& MO = MI.getOperand(i); - if (isX86_64ExtendedReg(MO)) - REX |= 1 << 2; - } - break; - } - } - } - return REX; -} - -/// sizePCRelativeBlockAddress - This method returns the size of a PC -/// relative block address instruction -/// -static unsigned sizePCRelativeBlockAddress() { - return 4; -} - -/// sizeGlobalAddress - Give the size of the emission of this global address -/// -static unsigned sizeGlobalAddress(bool dword) { - return dword ? 8 : 4; -} - -/// sizeConstPoolAddress - Give the size of the emission of this constant -/// pool address -/// -static unsigned sizeConstPoolAddress(bool dword) { - return dword ? 8 : 4; -} - -/// sizeExternalSymbolAddress - Give the size of the emission of this external -/// symbol -/// -static unsigned sizeExternalSymbolAddress(bool dword) { - return dword ? 8 : 4; -} - -/// sizeJumpTableAddress - Give the size of the emission of this jump -/// table address -/// -static unsigned sizeJumpTableAddress(bool dword) { - return dword ? 8 : 4; -} - -static unsigned sizeConstant(unsigned Size) { - return Size; -} - -static unsigned sizeRegModRMByte(){ - return 1; -} - -static unsigned sizeSIBByte(){ - return 1; -} - -static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { - unsigned FinalSize = 0; - // If this is a simple integer displacement that doesn't require a relocation. - if (!RelocOp) { - FinalSize += sizeConstant(4); - return FinalSize; - } - - // Otherwise, this is something that requires a relocation. - if (RelocOp->isGlobal()) { - FinalSize += sizeGlobalAddress(false); - } else if (RelocOp->isCPI()) { - FinalSize += sizeConstPoolAddress(false); - } else if (RelocOp->isJTI()) { - FinalSize += sizeJumpTableAddress(false); - } else { - llvm_unreachable("Unknown value to relocate!"); - } - return FinalSize; -} - -static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op, - bool IsPIC, bool Is64BitMode) { - const MachineOperand &Op3 = MI.getOperand(Op+3); - int DispVal = 0; - const MachineOperand *DispForReloc = 0; - unsigned FinalSize = 0; - - // Figure out what sort of displacement we have to handle here. - if (Op3.isGlobal()) { - DispForReloc = &Op3; - } else if (Op3.isCPI()) { - if (Is64BitMode || IsPIC) { - DispForReloc = &Op3; - } else { - DispVal = 1; - } - } else if (Op3.isJTI()) { - if (Is64BitMode || IsPIC) { - DispForReloc = &Op3; - } else { - DispVal = 1; - } - } else { - DispVal = 1; - } - - const MachineOperand &Base = MI.getOperand(Op); - const MachineOperand &IndexReg = MI.getOperand(Op+2); - - unsigned BaseReg = Base.getReg(); - - // Is a SIB byte needed? - if ((!Is64BitMode || DispForReloc || BaseReg != 0) && - IndexReg.getReg() == 0 && - (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { - if (BaseReg == 0) { // Just a displacement? - // Emit special case [disp32] encoding - ++FinalSize; - FinalSize += getDisplacementFieldSize(DispForReloc); - } else { - unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg); - if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { - // Emit simple indirect register encoding... [EAX] f.e. - ++FinalSize; - // Be pessimistic and assume it's a disp32, not a disp8 - } else { - // Emit the most general non-SIB encoding: [REG+disp32] - ++FinalSize; - FinalSize += getDisplacementFieldSize(DispForReloc); - } - } - - } else { // We need a SIB byte, so start by outputting the ModR/M byte first - assert(IndexReg.getReg() != X86::ESP && - IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); - - bool ForceDisp32 = false; - if (BaseReg == 0 || DispForReloc) { - // Emit the normal disp32 encoding. - ++FinalSize; - ForceDisp32 = true; - } else { - ++FinalSize; - } - - FinalSize += sizeSIBByte(); - - // Do we need to output a displacement? - if (DispVal != 0 || ForceDisp32) { - FinalSize += getDisplacementFieldSize(DispForReloc); - } - } - return FinalSize; -} - - -static unsigned GetInstSizeWithDesc(const MachineInstr &MI, - const TargetInstrDesc *Desc, - bool IsPIC, bool Is64BitMode) { - - unsigned Opcode = Desc->Opcode; - unsigned FinalSize = 0; - - // Emit the lock opcode prefix as needed. - if (Desc->TSFlags & X86II::LOCK) ++FinalSize; - - // Emit segment override opcode prefix as needed. - switch (Desc->TSFlags & X86II::SegOvrMask) { - case X86II::FS: - case X86II::GS: - ++FinalSize; - break; - default: llvm_unreachable("Invalid segment!"); - case 0: break; // No segment override! - } - - // Emit the repeat opcode prefix as needed. - if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize; - - // Emit the operand size opcode prefix as needed. - if (Desc->TSFlags & X86II::OpSize) ++FinalSize; - - // Emit the address size opcode prefix as needed. - if (Desc->TSFlags & X86II::AdSize) ++FinalSize; - - bool Need0FPrefix = false; - switch (Desc->TSFlags & X86II::Op0Mask) { - case X86II::TB: // Two-byte opcode prefix - case X86II::T8: // 0F 38 - case X86II::TA: // 0F 3A - Need0FPrefix = true; - break; - case X86II::TF: // F2 0F 38 - ++FinalSize; - Need0FPrefix = true; - break; - case X86II::REP: break; // already handled. - case X86II::XS: // F3 0F - ++FinalSize; - Need0FPrefix = true; - break; - case X86II::XD: // F2 0F - ++FinalSize; - Need0FPrefix = true; - break; - case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: - case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: - ++FinalSize; - break; // Two-byte opcode prefix - default: llvm_unreachable("Invalid prefix!"); - case 0: break; // No prefix! - } - - if (Is64BitMode) { - // REX prefix - unsigned REX = X86InstrInfo::determineREX(MI); - if (REX) - ++FinalSize; - } - - // 0x0F escape code must be emitted just before the opcode. - if (Need0FPrefix) - ++FinalSize; - - switch (Desc->TSFlags & X86II::Op0Mask) { - case X86II::T8: // 0F 38 - ++FinalSize; - break; - case X86II::TA: // 0F 3A - ++FinalSize; - break; - case X86II::TF: // F2 0F 38 - ++FinalSize; - break; - } - - // If this is a two-address instruction, skip one of the register operands. - unsigned NumOps = Desc->getNumOperands(); - unsigned CurOp = 0; - if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) - CurOp++; - else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; - - switch (Desc->TSFlags & X86II::FormMask) { - default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); - case X86II::Pseudo: - // Remember the current PC offset, this is the PIC relocation - // base address. - switch (Opcode) { - default: - break; - case TargetOpcode::INLINEASM: { - const MachineFunction *MF = MI.getParent()->getParent(); - const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); - FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), - *MF->getTarget().getMCAsmInfo()); - break; - } - case TargetOpcode::DBG_LABEL: - case TargetOpcode::EH_LABEL: - case TargetOpcode::DBG_VALUE: - break; - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case X86::FP_REG_KILL: - break; - case X86::MOVPC32r: { - // This emits the "call" portion of this pseudo instruction. - ++FinalSize; - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - break; - } - } - CurOp = NumOps; - break; - case X86II::RawFrm: - ++FinalSize; - - if (CurOp != NumOps) { - const MachineOperand &MO = MI.getOperand(CurOp++); - if (MO.isMBB()) { - FinalSize += sizePCRelativeBlockAddress(); - } else if (MO.isGlobal()) { - FinalSize += sizeGlobalAddress(false); - } else if (MO.isSymbol()) { - FinalSize += sizeExternalSymbolAddress(false); - } else if (MO.isImm()) { - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - } else { - llvm_unreachable("Unknown RawFrm operand!"); - } - } - break; - - case X86II::AddRegFrm: - ++FinalSize; - ++CurOp; - - if (CurOp != NumOps) { - const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); - if (MO1.isImm()) - FinalSize += sizeConstant(Size); - else { - bool dword = false; - if (Opcode == X86::MOV64ri) - dword = true; - if (MO1.isGlobal()) { - FinalSize += sizeGlobalAddress(dword); - } else if (MO1.isSymbol()) - FinalSize += sizeExternalSymbolAddress(dword); - else if (MO1.isCPI()) - FinalSize += sizeConstPoolAddress(dword); - else if (MO1.isJTI()) - FinalSize += sizeJumpTableAddress(dword); - } - } - break; - - case X86II::MRMDestReg: { - ++FinalSize; - FinalSize += sizeRegModRMByte(); - CurOp += 2; - if (CurOp != NumOps) { - ++CurOp; - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - } - break; - } - case X86II::MRMDestMem: { - ++FinalSize; - FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86::AddrNumOperands + 1; - if (CurOp != NumOps) { - ++CurOp; - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - } - break; - } - - case X86II::MRMSrcReg: - ++FinalSize; - FinalSize += sizeRegModRMByte(); - CurOp += 2; - if (CurOp != NumOps) { - ++CurOp; - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - } - break; - - case X86II::MRMSrcMem: { - ++FinalSize; - FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); - CurOp += X86::AddrNumOperands + 1; - if (CurOp != NumOps) { - ++CurOp; - FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); - } - break; - } - - case X86II::MRM0r: case X86II::MRM1r: - case X86II::MRM2r: case X86II::MRM3r: - case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: - ++FinalSize; - if (Desc->getOpcode() == X86::LFENCE || - Desc->getOpcode() == X86::MFENCE) { - // Special handling of lfence and mfence; - FinalSize += sizeRegModRMByte(); - } else if (Desc->getOpcode() == X86::MONITOR || - Desc->getOpcode() == X86::MWAIT) { - // Special handling of monitor and mwait. - FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode. - } else { - ++CurOp; - FinalSize += sizeRegModRMByte(); - } - - if (CurOp != NumOps) { - const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); - if (MO1.isImm()) - FinalSize += sizeConstant(Size); - else { - bool dword = false; - if (Opcode == X86::MOV64ri32) - dword = true; - if (MO1.isGlobal()) { - FinalSize += sizeGlobalAddress(dword); - } else if (MO1.isSymbol()) - FinalSize += sizeExternalSymbolAddress(dword); - else if (MO1.isCPI()) - FinalSize += sizeConstPoolAddress(dword); - else if (MO1.isJTI()) - FinalSize += sizeJumpTableAddress(dword); - } - } - break; - - case X86II::MRM0m: case X86II::MRM1m: - case X86II::MRM2m: case X86II::MRM3m: - case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: { - - ++FinalSize; - FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86::AddrNumOperands; - - if (CurOp != NumOps) { - const MachineOperand &MO = MI.getOperand(CurOp++); - unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); - if (MO.isImm()) - FinalSize += sizeConstant(Size); - else { - bool dword = false; - if (Opcode == X86::MOV64mi32) - dword = true; - if (MO.isGlobal()) { - FinalSize += sizeGlobalAddress(dword); - } else if (MO.isSymbol()) - FinalSize += sizeExternalSymbolAddress(dword); - else if (MO.isCPI()) - FinalSize += sizeConstPoolAddress(dword); - else if (MO.isJTI()) - FinalSize += sizeJumpTableAddress(dword); - } - } - break; - - case X86II::MRM_C1: - case X86II::MRM_C8: - case X86II::MRM_C9: - case X86II::MRM_E8: - case X86II::MRM_F0: - FinalSize += 2; - break; - } - - case X86II::MRMInitReg: - ++FinalSize; - // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). - FinalSize += sizeRegModRMByte(); - ++CurOp; - break; - } - - if (!Desc->isVariadic() && CurOp != NumOps) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Cannot determine size: " << MI; - report_fatal_error(Msg.str()); - } - - - return FinalSize; -} - - -unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - const TargetInstrDesc &Desc = MI->getDesc(); - bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; - bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); - unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); - if (Desc.getOpcode() == X86::MOVPC32r) - Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); - return Size; -} - /// getGlobalBaseReg - Return a virtual register initialized with the /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. @@ -3573,7 +3025,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // that we don't include here. We don't want to replace instructions selected // by intrinsics. static const unsigned ReplaceableInstrs[][3] = { - //PackedInt PackedSingle PackedDouble + //PackedSingle PackedDouble PackedInt { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, @@ -3589,6 +3041,22 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI }, { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, + // AVX 128-bit support + { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, + { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, + { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr }, + { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, + { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, + { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, + { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, + { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, + { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm }, + { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, + { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, + { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, + { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI }, + { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, + { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, }; // FIXME: Some shuffle and unpack instructions have equivalents in different @@ -3627,7 +3095,7 @@ namespace { /// global base register for x86-32. struct CGBR : public MachineFunctionPass { static char ID; - CGBR() : MachineFunctionPass(&ID) {} + CGBR() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF) { const X86TargetMachine *TM = diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index ad0217adb4758..f33620641e88c 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -311,6 +311,12 @@ namespace X86II { MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, + + /// RawFrmImm16 - This is used for CALL FAR instructions, which have two + /// immediates, the first of which is a 16 or 32-bit immediate (specified by + /// the imm encoding) and the second is a 16-bit fixed value. In the AMD + /// manual, this operand is described as pntr16:32 and pntr16:16 + RawFrmImm16 = 43, FormMask = 63, @@ -439,27 +445,27 @@ namespace X86II { //===------------------------------------------------------------------===// // VEX - The opcode prefix used by AVX instructions - VEX = 1ULL << 32, + VEX = 1U << 0, // VEX_W - Has a opcode specific functionality, but is used in the same // way as REX_W is for regular SSE instructions. - VEX_W = 1ULL << 33, + VEX_W = 1U << 1, // VEX_4V - Used to specify an additional AVX/SSE register. Several 2 // address instructions in SSE are represented as 3 address ones in AVX // and the additional register is encoded in VEX_VVVV prefix. - VEX_4V = 1ULL << 34, + VEX_4V = 1U << 2, // VEX_I8IMM - Specifies that the last register used in a AVX instruction, // must be encoded in the i8 immediate field. This usually happens in // instructions with 4 operands. - VEX_I8IMM = 1ULL << 35, + VEX_I8IMM = 1U << 3, // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current // instruction uses 256-bit wide registers. This is usually auto detected if // a VR256 register is used, but some AVX instructions also have this field // marked when using a f256 memory references. - VEX_L = 1ULL << 36 + VEX_L = 1U << 4 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the @@ -522,11 +528,12 @@ namespace X86II { case X86II::AddRegFrm: case X86II::MRMDestReg: case X86II::MRMSrcReg: + case X86II::RawFrmImm16: return -1; case X86II::MRMDestMem: return 0; case X86II::MRMSrcMem: { - bool HasVEX_4V = TSFlags & X86II::VEX_4V; + bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V; unsigned FirstMemOp = 1; if (HasVEX_4V) ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). @@ -610,12 +617,6 @@ public: /// virtual const X86RegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - /// isCoalescableExtInstr - Return true if the instruction is a "coalescable" /// extension instruction. That is, it's like a copy where it's legal for the /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns @@ -826,16 +827,11 @@ public: if (!MO.isReg()) return false; return isX86_64ExtendedReg(MO.getReg()); } - static unsigned determineREX(const MachineInstr &MI); /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or /// higher) register? e.g. r8, xmm8, xmm13, etc. static bool isX86_64ExtendedReg(unsigned RegNo); - /// GetInstSize - Returns the size of the specified MachineInstr. - /// - virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const; - /// getGlobalBaseReg - Return a virtual register initialized with the /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 1efef5a80b1ba..09b7721a621d3 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -80,6 +80,21 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; +def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>; +def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>; + +def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER, + [SDNPHasChain]>; +def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE, + [SDNPHasChain]>; +def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER, + [SDNPHasChain]>; +def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER, + [SDNPHasChain]>; +def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER, + [SDNPHasChain]>; + + def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>; def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>; def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>; @@ -222,7 +237,7 @@ def i16mem : X86MemOperand<"printi16mem">; def i32mem : X86MemOperand<"printi32mem">; def i64mem : X86MemOperand<"printi64mem">; def i128mem : X86MemOperand<"printi128mem">; -//def i256mem : X86MemOperand<"printi256mem">; +def i256mem : X86MemOperand<"printi256mem">; def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; @@ -333,15 +348,21 @@ def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr", // X86 Instruction Predicate Definitions. def HasCMov : Predicate<"Subtarget->hasCMov()">; def NoCMov : Predicate<"!Subtarget->hasCMov()">; -def HasMMX : Predicate<"Subtarget->hasMMX()">; -def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; -def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; -def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; -def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; -def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; -def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; -def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; + +// FIXME: temporary hack to let codegen assert or generate poor code in case +// no AVX version of the desired intructions is present, this is better for +// incremental dev (without fallbacks it's easier to spot what's missing) +def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">; +def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">; +def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">; +def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">; +def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">; +def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">; +def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">; +def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">; + def HasAVX : Predicate<"Subtarget->hasAVX()">; +def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">; def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; @@ -393,9 +414,7 @@ def X86_COND_O : PatLeaf<(i8 13)>; def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE def X86_COND_S : PatLeaf<(i8 15)>; -def immSext8 : PatLeaf<(imm), [{ - return N->getSExtValue() == (int8_t)N->getSExtValue(); -}]>; +def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>; def i16immSExt8 : PatLeaf<(i16 immSext8)>; def i32immSExt8 : PatLeaf<(i32 immSext8)>; @@ -559,9 +578,10 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo, // The main point of having separate instruction are extra unmodelled effects // (compared to ordinary calls) like stack pointer change. -def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins), - "# dynamic stack allocation", - [(X86MingwAlloca)]>; +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in + def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins), + "# dynamic stack allocation", + [(X86MingwAlloca)]>; } // Nop @@ -574,10 +594,14 @@ let neverHasSideEffects = 1 in { } // Trap -def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; -def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>; +let Uses = [EFLAGS] in { + def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; +} +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", + [(int_x86_int (i8 3))]>; // FIXME: need to make sure that "int $3" matches int3 -def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; +def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", + [(int_x86_int imm:$trap)]>; def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>; @@ -650,16 +674,16 @@ let Uses = [ECX], isBranch = 1, isTerminator = 1 in // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", - [(brind GR32:$dst)]>; + [(brind GR32:$dst)]>, Requires<[In32BitMode]>; def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", - [(brind (loadi32 addr:$dst))]>; + [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>; - def FARJMP16i : Iseg16<0xEA, RawFrm, (outs), - (ins i16imm:$seg, i16imm:$off), - "ljmp{w}\t$seg, $off", []>, OpSize; - def FARJMP32i : Iseg32<0xEA, RawFrm, (outs), - (ins i16imm:$seg, i32imm:$off), - "ljmp{l}\t$seg, $off", []>; + def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), + (ins i16imm:$off, i16imm:$seg), + "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), + (ins i32imm:$off, i16imm:$seg), + "ljmp{l}\t{$seg, $off|$off, $seg}", []>; def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), "ljmp{w}\t{*}$dst", []>, OpSize; @@ -670,9 +694,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // Loop instructions -def LOOP : I<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; -def LOOPE : I<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; -def LOOPNE : I<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; +def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; +def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; +def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; //===----------------------------------------------------------------------===// // Call Instructions... @@ -695,12 +719,12 @@ let isCall = 1 in def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>; - def FARCALL16i : Iseg16<0x9A, RawFrm, (outs), - (ins i16imm:$seg, i16imm:$off), - "lcall{w}\t$seg, $off", []>, OpSize; - def FARCALL32i : Iseg32<0x9A, RawFrm, (outs), - (ins i16imm:$seg, i32imm:$off), - "lcall{l}\t$seg, $off", []>; + def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), + (ins i16imm:$off, i16imm:$seg), + "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), + (ins i32imm:$off, i16imm:$seg), + "lcall{l}\t{$seg, $off|$off, $seg}", []>; def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), "lcall{w}\t{*}$dst", []>, OpSize; @@ -721,7 +745,8 @@ def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl), // Tail call stuff. -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + isCodeGenOnly = 1 in let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, @@ -756,7 +781,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in // let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in def LEAVE : I<0xC9, RawFrm, - (outs), (ins), "leave", []>; + (outs), (ins), "leave", []>, Requires<[In32BitMode]>; def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; @@ -934,7 +959,7 @@ def SYSRET : I<0x07, RawFrm, def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB; def SYSEXIT : I<0x35, RawFrm, - (outs), (ins), "sysexit", []>, TB; + (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>; def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>; @@ -1025,17 +1050,23 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), - "mov{b}\t{$src, %al|%al, $src}", []>; + "mov{b}\t{$src, %al|%al, $src}", []>, + Requires<[In32BitMode]>; def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), - "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize, + Requires<[In32BitMode]>; def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), - "mov{l}\t{$src, %eax|%eax, $src}", []>; + "mov{l}\t{$src, %eax|%eax, $src}", []>, + Requires<[In32BitMode]>; def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins), - "mov{b}\t{%al, $dst|$dst, %al}", []>; + "mov{b}\t{%al, $dst|$dst, %al}", []>, + Requires<[In32BitMode]>; def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), - "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; + "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize, + Requires<[In32BitMode]>; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), - "mov{l}\t{%eax, $dst|$dst, %eax}", []>; + "mov{l}\t{%eax, $dst|$dst, %eax}", []>, + Requires<[In32BitMode]>; // Moves to and from segment registers def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), @@ -1087,6 +1118,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), [(store GR32:$src, addr:$dst)]>; /// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC. +let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; @@ -1101,10 +1133,12 @@ let mayStore = 1 in def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; +} // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be // encoded when a REX prefix is present. +let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV8rr_NOREX : I<0x88, MRMDestReg, (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), @@ -1118,6 +1152,7 @@ let mayLoad = 1, def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; +} // Moves to and from debug registers def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), @@ -1137,7 +1172,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), // Extra precision multiplication -// AL is really implied by AX, by the registers in Defs must match the +// AL is really implied by AX, but the registers in Defs must match the // SDNode results (i8, i32). let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", @@ -3895,6 +3930,20 @@ def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), // Atomic support // +// Memory barriers + +// TODO: Get this to fold the constant into the instruction. +def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), + "lock\n\t" + "or{l}\t{$zero, $dst|$dst, $zero}", + []>, Requires<[In32BitMode]>, LOCK; + +let hasSideEffects = 1 in { +def Int_MemBarrier : I<0, Pseudo, (outs), (ins), + "#MEMBARRIER", + [(X86MemBarrier)]>, Requires<[HasSSE2]>; +} + // Atomic swap. These are just normal xchg instructions. But since a memory // operand is referenced, the atomicity is ensured. let Constraints = "$val = $dst" in { @@ -4928,6 +4977,12 @@ include "X86Instr64bit.td" include "X86InstrFragmentsSIMD.td" //===----------------------------------------------------------------------===// +// FMA - Fused Multiply-Add support (requires FMA) +//===----------------------------------------------------------------------===// + +include "X86InstrFMA.td" + +//===----------------------------------------------------------------------===// // XMM Floating point support (requires SSE / SSE2) //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 6cf7ac83620e8..11d4179534dce 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -164,7 +164,7 @@ let neverHasSideEffects = 1 in def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVFR642Qrr: SSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src), +def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", []>; def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ebe161b46bdcb..f5466f83f5192 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -142,7 +142,7 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_", !strconcat(SSEVer, !strconcat("_", !strconcat(OpcodeStr, FPSizeStr)))) RC:$src1, RC:$src2))], d>; @@ -150,7 +150,7 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_", !strconcat(SSEVer, !strconcat("_", !strconcat(OpcodeStr, FPSizeStr)))) RC:$src1, (mem_frag addr:$src2)))], d>; @@ -256,10 +256,10 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), let isAsmParserOnly = 1 in { def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)]>, XS, VEX_4V; + [(store FR32:$src, addr:$dst)]>, XS, VEX; def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>, XD, VEX_4V; + [(store FR64:$src, addr:$dst)]>, XD, VEX; } // Extract and store. @@ -340,6 +340,15 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; } + +def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>; +def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), + (VMOVUPSYmr addr:$dst, VR256:$src)>; + +def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>; +def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), + (VMOVUPDYmr addr:$dst, VR256:$src)>; + def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; @@ -516,6 +525,14 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>; } +multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + X86MemOperand x86memop, string asm> { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + []>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + []>; +} + multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d> { @@ -526,35 +543,58 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, } multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, - SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, - string asm> { + X86MemOperand x86memop, string asm> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), - asm, []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), - (ins DstRC:$src1, x86memop:$src), asm, []>; + (ins DstRC:$src1, x86memop:$src), + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; } let isAsmParserOnly = 1 in { -defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX; -defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; -defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, - "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS, - VEX_4V; -defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, - "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD, - VEX_4V; +defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, + "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX; +defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, + "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, + VEX_W; +defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; +defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, + VEX, VEX_W; + +// The assembler can recognize rr 64-bit instructions by seeing a rxx +// register, but the same isn't true when only using memory operands, +// provide other assembly "l" and "q" forms to address this explicitly +// where appropriate to do so. +defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS, + VEX_4V; +defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, + VEX_4V, VEX_W; +defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD, + VEX_4V; +defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, + VEX_4V; +defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, + VEX_4V, VEX_W; } defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}">, XS; +defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, + "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si\t{$src, $dst|$dst, $src}">, XD; +defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, + "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS; +defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, + "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD; +defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, + "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W; // Conversion Instructions Intrinsics - Match intrinsics which expect MM // and/or XMM operand(s). @@ -570,10 +610,12 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm> { - def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))]>; - def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>; + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set DstRC:$dst, (Int SrcRC:$src))]>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>; } multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -588,35 +630,79 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, - PatFrag ld_frag, string asm> { + PatFrag ld_frag, string asm, bit Is2Addr = 1> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), - asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>; + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), - (ins DstRC:$src1, x86memop:$src2), asm, + (ins DstRC:$src1, x86memop:$src2), + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>; } let isAsmParserOnly = 1 in { defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS, - VEX; + f32mem, load, "cvtss2si">, XS, VEX; + defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, + int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">, + XS, VEX, VEX_W; defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, - VEX; + f128mem, load, "cvtsd2si">, XD, VEX; + defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, + int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">, + XD, VEX, VEX_W; + + // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_ + // Get rid of this hack or rename the intrinsics, there are several + // intructions that only match with the intrinsic form, why create duplicates + // to let them be recognized by the assembler? + defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; + defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem, + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W; } defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS; + f32mem, load, "cvtss2si">, XS; +defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, + f32mem, load, "cvtss2si{q}">, XS, REX_W; defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD; + f128mem, load, "cvtsd2si">, XD; +defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, + f128mem, load, "cvtsd2si">, XD, REX_W; +defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD, + REX_W; + +let isAsmParserOnly = 1 in { + defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V; + defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V, + VEX_W; + defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V; + defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD, + VEX_4V, VEX_W; +} let Constraints = "$src1 = $dst" in { defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, - "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS; + "cvtsi2ss">, XS; + defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse_cvtsi642ss, i64mem, loadi64, + "cvtsi2ss{q}">, XS, REX_W; defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse2_cvtsi2sd, i32mem, loadi32, - "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD; + "cvtsi2sd">, XD; + defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse2_cvtsi642sd, i64mem, loadi64, + "cvtsi2sd">, XD, REX_W; } // Instructions below don't have an AVX form. @@ -645,35 +731,48 @@ let Constraints = "$src1 = $dst" in { /// SSE 1 Only // Aliases for intrinsics -let isAsmParserOnly = 1, Pattern = []<dag> in { -defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32, - int_x86_sse_cvttss2si, f32mem, load, - "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS; -defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32, - int_x86_sse2_cvttsd2si, f128mem, load, - "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD; +let isAsmParserOnly = 1 in { +defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, + f32mem, load, "cvttss2si">, XS, VEX; +defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, + int_x86_sse_cvttss2si64, f32mem, load, + "cvttss2si">, XS, VEX, VEX_W; +defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, + f128mem, load, "cvttss2si">, XD, VEX; +defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, + int_x86_sse2_cvttsd2si64, f128mem, load, + "cvttss2si">, XD, VEX, VEX_W; } defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, - f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">, - XS; + f32mem, load, "cvttss2si">, XS; +defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, + int_x86_sse_cvttss2si64, f32mem, load, + "cvttss2si{q}">, XS, REX_W; defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">, - XD; + f128mem, load, "cvttss2si">, XD; +defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, + int_x86_sse2_cvttsd2si64, f128mem, load, + "cvttss2si{q}">, XD, REX_W; let isAsmParserOnly = 1, Pattern = []<dag> in { -defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, - "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX; -defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB, VEX; -defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB, VEX; +defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, + "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX; +defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, + "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX, + VEX_W; +defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB, VEX; +defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB, VEX; } let Pattern = []<dag> in { defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS; -defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/, +defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/, + "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; +defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */ } @@ -701,13 +800,11 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), let isAsmParserOnly = 1 in defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, - "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, - XS, VEX_4V; + int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>, + XS, VEX_4V; let Constraints = "$src1 = $dst" in defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, - "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS; + int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS; // Convert scalar single to scalar double let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix @@ -806,6 +903,7 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (bitconvert (memopv2i64 addr:$src))))]>, XS, Requires<[HasSSE2]>; + // Convert packed single/double fp to doubleword let isAsmParserOnly = 1 in { def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -964,11 +1062,11 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), let isAsmParserOnly = 1 in { def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", + "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, VEX, Requires<[HasAVX]>; def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", + "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd (load addr:$src)))]>, VEX, Requires<[HasAVX]>; @@ -1029,6 +1127,39 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memop addr:$src)))]>; +// AVX 256-bit register conversion intrinsics +// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below +// whenever possible to avoid declaring two versions of each one. +def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), + (VCVTDQ2PSYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)), + (VCVTDQ2PSYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), + (VCVTPD2PSYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), + (VCVTPD2PSYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), + (VCVTPS2DQYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), + (VCVTPS2DQYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), + (VCVTPS2PDYrr VR128:$src)>; +def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), + (VCVTPS2PDYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), + (VCVTTPD2DQYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), + (VCVTTPD2DQYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src), + (VCVTTPS2DQYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)), + (VCVTTPS2DQYrm addr:$src)>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// @@ -1193,16 +1324,14 @@ let isAsmParserOnly = 1 in { "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", SSEPackedDouble>, OpSize, VEX_4V; - let Pattern = []<dag> in { - defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps, - "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedSingle>, VEX_4V; - defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd, - "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", - "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - } + defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256, + "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedSingle>, VEX_4V; + defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256, + "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, @@ -1232,24 +1361,30 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, ValueType vt, string asm, PatFrag mem_frag, Domain d, bit IsConvertibleToThreeAddress = 0> { - def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm, - [(set VR128:$dst, (vt (shufp:$src3 - VR128:$src1, (mem_frag addr:$src2))))], d>; + def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm, + [(set RC:$dst, (vt (shufp:$src3 + RC:$src1, (mem_frag addr:$src2))))], d>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in - def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm, - [(set VR128:$dst, - (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>; + def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i8imm:$src3), asm, + [(set RC:$dst, + (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>; } let isAsmParserOnly = 1 in { - defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, - "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - memopv4f32, SSEPackedSingle>, VEX_4V; - defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, - "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", - memopv2f64, SSEPackedDouble>, OpSize, VEX_4V; + defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, + "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + memopv4f32, SSEPackedSingle>, VEX_4V; + defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, + "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + memopv8f32, SSEPackedSingle>, VEX_4V; + defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, + "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", + memopv2f64, SSEPackedDouble>, OpSize, VEX_4V; + defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, + "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", + memopv4f64, SSEPackedDouble>, OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { @@ -1351,12 +1486,23 @@ let isAsmParserOnly = 1 in { defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", SSEPackedDouble>, OpSize, VEX; - // FIXME: merge with multiclass above when the intrinsics come. - def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, + "movmskps", SSEPackedSingle>, VEX; + defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, + "movmskpd", SSEPackedDouble>, OpSize, + VEX; + + // Assembler Only + def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; + def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, + VEX; + def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; - def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, - VEX; + VEX; } //===----------------------------------------------------------------------===// @@ -1536,6 +1682,9 @@ let isCommutable = 0 in /// /// These three forms can each be reg+reg or reg+mem. /// + +/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those +/// classes below multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, bit Is2Addr = 1> { defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), @@ -1565,7 +1714,7 @@ multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr, } multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, - bit Is2Addr = 1> { + bit Is2Addr = 1> { defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS; defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, @@ -1573,37 +1722,57 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, } multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr, - bit Is2Addr = 1> { + bit Is2Addr = 1> { defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128, - !strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32, + !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB; defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128, - !strconcat(OpcodeStr, "pd"), "2", "_pd", f128mem, memopv2f64, + !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize; } +multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> { + defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256, + !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32, + SSEPackedSingle, 0>, TB; + + defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256, + !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64, + SSEPackedDouble, 0>, TB, OpSize; +} + // Binary Arithmetic instructions let isAsmParserOnly = 1 in { defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_s_int<0x58, "add", 0>, basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_s_int<0x59, "mul", 0>, basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; let isCommutable = 0 in { defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_s_int<0x5E, "div", 0>, basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_s_int<0x5F, "max", 0>, basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, - basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V; + basic_sse12_fp_binop_p_int<0x5F, "max", 0>, + basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V; defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_s_int<0x5D, "min", 0>, basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_p_int<0x5D, "min", 0>, + basic_sse12_fp_binop_p_y_int<0x5D, "min">, basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; } } @@ -1668,20 +1837,20 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr, SDNode OpNode, Intrinsic F32Int> { def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, XS, Requires<[HasAVX, OptForSize]>; - def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(!strconcat("v", OpcodeStr), - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2), - !strconcat(!strconcat("v", OpcodeStr), - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, + "ss\t{$src, $dst, $dst|$dst, $dst, $src}"), + [(set VR128:$dst, (F32Int VR128:$src))]>; + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), + !strconcat(OpcodeStr, + "ss\t{$src, $dst, $dst|$dst, $dst, $src}"), + [(set VR128:$dst, (F32Int sse_load_f32:$src))]>; } /// sse1_fp_unop_p - SSE1 unops in packed form. @@ -1715,6 +1884,16 @@ multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>; } +/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms. +multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, + Intrinsic V4F32Int> { + def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (V4F32Int VR256:$src))]>; + def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>; +} /// sse2_fp_unop_s - SSE2 unops in scalar form. multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, @@ -1738,21 +1917,19 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr, SDNode OpNode, Intrinsic F64Int> { - def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - !strconcat(OpcodeStr, - "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f64mem:$src2), - !strconcat(OpcodeStr, - "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; - def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, sdmem:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; + def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), + (ins FR64:$src1, f64mem:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"), + [(set VR128:$dst, (F64Int VR128:$src))]>; + def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), + !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"), + [(set VR128:$dst, (F64Int sse_load_f64:$src))]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -1787,29 +1964,48 @@ multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>; } +/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms. +multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, + Intrinsic V2F64Int> { + def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (V2F64Int VR256:$src))]>; + def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>; +} + let isAsmParserOnly = 1, Predicates = [HasAVX] in { // Square root. - defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, - sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, + defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>, + sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>, VEX_4V; defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>, sse2_fp_unop_p<0x51, "vsqrt", fsqrt>, sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>, sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>, + sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>, + sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>, + sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>, + sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>, VEX; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. - defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt, + defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>, VEX_4V; defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>, - sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX; + sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, + sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>, + sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX; - defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, + defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>, VEX_4V; defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>, - sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX; + sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, + sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>, + sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX; } // Square root. @@ -1898,6 +2094,13 @@ let isAsmParserOnly = 1 in { } } +def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src), + (VMOVNTDQYmr addr:$dst, VR256:$src)>; +def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src), + (VMOVNTPDYmr addr:$dst, VR256:$src)>; +def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src), + (VMOVNTPSYmr addr:$dst, VR256:$src)>; + def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; @@ -1961,11 +2164,14 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), // Load, store, and memory fence def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>; +def : Pat<(X86SFence), (SFENCE)>; // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. -// FIXME: Change encoding to pseudo! +// FIXME: Change encoding to pseudo! This is blocked right now by the x86 +// JIT implementatioan, it does not expand the instructions below like +// X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isCodeGenOnly = 1 in { def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", @@ -1977,6 +2183,26 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllZerosV))]>; } +// The same as done above but for AVX. The 128-bit versions are the +// same, but re-encoded. The 256-bit does not support PI version. +// FIXME: Change encoding to pseudo! This is blocked right now by the x86 +// JIT implementatioan, it does not expand the instructions below like +// X86MCInstLower does. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1, Predicates = [HasAVX] in { +def AVX_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V; +def AVX_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V; +def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; +def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; +let ExeDomain = SSEPackedInt in +def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllZerosV))]>; +} + def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; @@ -2003,35 +2229,47 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), //===---------------------------------------------------------------------===// // SSE2 - Move Aligned/Unaligned Packed Integer Instructions //===---------------------------------------------------------------------===// + let ExeDomain = SSEPackedInt in { // SSE integer instructions let isAsmParserOnly = 1 in { - let neverHasSideEffects = 1 in - def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; + let neverHasSideEffects = 1 in { + def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + } + def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; + def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; let canFoldAsLoad = 1, mayLoad = 1 in { - def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "movdqa\t{$src, $dst|$dst, $src}", - [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>, - VEX; - def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vmovdqu\t{$src, $dst|$dst, $src}", - [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, - XS, VEX, Requires<[HasAVX]>; + def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + let Predicates = [HasAVX] in { + def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + } } let mayStore = 1 in { - def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), - (ins i128mem:$dst, VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", - [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX; - def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "vmovdqu\t{$src, $dst|$dst, $src}", - [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, - XS, VEX, Requires<[HasAVX]>; + def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i256mem:$dst, VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + let Predicates = [HasAVX] in { + def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + } } } @@ -2084,6 +2322,10 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), } // ExeDomain = SSEPackedInt +def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>; +def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), + (VMOVDQUYmr addr:$dst, VR256:$src)>; + //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// @@ -2376,6 +2618,25 @@ let ExeDomain = SSEPackedInt in { } } // Constraints = "$src1 = $dst" +let Predicates = [HasAVX] in { + def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), + (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), + (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), + (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>; + def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), + (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>; + def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), + (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + + // Shift up / down and insert zero's. + def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), + (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>; + def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), + (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; +} + let Predicates = [HasSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; @@ -2662,11 +2923,16 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg, imm:$src2))]>; // Insert -let isAsmParserOnly = 1, Predicates = [HasAVX] in - defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V; + def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), + "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, OpSize, VEX_4V; +} let Constraints = "$src1 = $dst" in - defm VPINSRW : sse2_pinsrw, TB, OpSize; + defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>; } // ExeDomain = SSEPackedInt @@ -2676,10 +2942,13 @@ let Constraints = "$src1 = $dst" in let ExeDomain = SSEPackedInt in { -let isAsmParserOnly = 1 in -def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), +let isAsmParserOnly = 1 in { +def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX; +def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX; +} def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; @@ -2939,18 +3208,20 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), // Instructions to match in the assembler let isAsmParserOnly = 1 in { -// This instructions is in fact an alias to movd with 64 bit dst def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; +// Recognize "movd" with GR64 destination, but encode as a "movq" +def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; } // Instructions for the disassembler // xr = XMM register // xm = mem64 -let isAsmParserOnly = 1 in +let isAsmParserOnly = 1, Predicates = [HasAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -2970,19 +3241,14 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; +def : Pat<(X86LFence), (LFENCE)>; +def : Pat<(X86MFence), (MFENCE)>; + // Pause. This "instruction" is encoded as "rep; nop", so even though it // was introduced with SSE2, it's backward compatible. def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; -//TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 0)), (NOOP)>; -def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; -def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 1)), (MFENCE)>; - // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. @@ -3027,13 +3293,13 @@ def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), // Convert Packed DW Integers to Packed Double FP let isAsmParserOnly = 1, Predicates = [HasAVX] in { def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -3041,6 +3307,17 @@ def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; +// AVX 256-bit register conversion intrinsics +def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), + (VCVTDQ2PDYrr VR128:$src)>; +def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)), + (VCVTDQ2PDYrm addr:$src)>; + +def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), + (VCVTPD2DQYrr VR256:$src)>; +def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), + (VCVTPD2DQYrm addr:$src)>; + //===---------------------------------------------------------------------===// // SSE3 - Move Instructions //===---------------------------------------------------------------------===// @@ -3057,9 +3334,20 @@ def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (memopv4f32 addr:$src), (undef)))]>; } +multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag, + string OpcodeStr> { +def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; +def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>; +} + let isAsmParserOnly = 1, Predicates = [HasAVX] in { -defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; -defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; + // FIXME: Merge above classes when we have patterns for the ymm version + defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; + defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; + defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX; + defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX; } defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">; defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">; @@ -3076,15 +3364,31 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), (undef))))]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in - defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; +multiclass sse3_replicate_dfp_y<string OpcodeStr> { +def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>; +def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // FIXME: Merge above classes when we have patterns for the ymm version + defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; + defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; +} defm MOVDDUP : sse3_replicate_dfp<"movddup">; // Move Unaligned Integer -let isAsmParserOnly = 1 in +let isAsmParserOnly = 1, Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vlddqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; + "vlddqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; + def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "vlddqu\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX; +} def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; @@ -3125,35 +3429,39 @@ let AddedComplexity = 20 in // SSE3 - Arithmetic //===---------------------------------------------------------------------===// -multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> { +multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, bit Is2Addr = 1> { def rr : I<0xD0, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (Int VR128:$src1, - VR128:$src2))]>; + [(set RC:$dst, (Int RC:$src1, RC:$src2))]>; def rm : I<0xD0, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (Int VR128:$src1, - (memop addr:$src2)))]>; - + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>; } let isAsmParserOnly = 1, Predicates = [HasAVX], ExeDomain = SSEPackedDouble in { - defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD, - VEX_4V; - defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize, - VEX_4V; + defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, + f128mem, 0>, XD, VEX_4V; + defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, + f128mem, 0>, OpSize, VEX_4V; + defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, + f256mem, 0>, XD, VEX_4V; + defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, + f256mem, 0>, OpSize, VEX_4V; } let Constraints = "$src1 = $dst", Predicates = [HasSSE3], ExeDomain = SSEPackedDouble in { - defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD; - defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize; + defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, + f128mem>, XD; + defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, + f128mem>, TB, OpSize; } //===---------------------------------------------------------------------===// @@ -3161,61 +3469,72 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3], //===---------------------------------------------------------------------===// // Horizontal ops -class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> - : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), +multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, + X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> { + def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>; -class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> - : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>; + + def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>; -class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> - : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>; +} +multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, + X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> { + def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>; -class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> - : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>; + + def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>; + [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>; +} let isAsmParserOnly = 1, Predicates = [HasAVX] in { - def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; - def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; - def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; - def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; - def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; - def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; - def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; - def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; + defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, + int_x86_sse3_hadd_ps, 0>, VEX_4V; + defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, + int_x86_sse3_hadd_pd, 0>, VEX_4V; + defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, + int_x86_sse3_hsub_ps, 0>, VEX_4V; + defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, + int_x86_sse3_hsub_pd, 0>, VEX_4V; + defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, + int_x86_avx_hadd_ps_256, 0>, VEX_4V; + defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, + int_x86_avx_hadd_pd_256, 0>, VEX_4V; + defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, + int_x86_avx_hsub_ps_256, 0>, VEX_4V; + defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, + int_x86_avx_hsub_pd_256, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { - def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>; - def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>; - def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>; - def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>; - def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>; - def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>; - def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; - def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; + defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, + int_x86_sse3_hadd_ps>; + defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, + int_x86_sse3_hadd_pd>; + defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, + int_x86_sse3_hsub_ps>; + defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, + int_x86_sse3_hsub_pd>; } //===---------------------------------------------------------------------===// // SSSE3 - Packed Absolute Instructions //===---------------------------------------------------------------------===// -/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. -multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, - PatFrag mem_frag64, PatFrag mem_frag128, - Intrinsic IntId64, Intrinsic IntId128> { +/// SS3I_unop_rm_int_mm - Simple SSSE3 unary whose type can be v*{i8,i16,i32}. +multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, + PatFrag mem_frag64, Intrinsic IntId64> { def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 VR64:$src))]>; @@ -3224,7 +3543,11 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 (bitconvert (mem_frag64 addr:$src))))]>; +} +/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. +multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, + PatFrag mem_frag128, Intrinsic IntId128> { def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -3240,26 +3563,28 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, } let isAsmParserOnly = 1, Predicates = [HasAVX] in { - defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8, - int_x86_ssse3_pabs_b, + defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8, int_x86_ssse3_pabs_b_128>, VEX; - defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16, - int_x86_ssse3_pabs_w, + defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16, int_x86_ssse3_pabs_w_128>, VEX; - defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32, - int_x86_ssse3_pabs_d, + defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32, int_x86_ssse3_pabs_d_128>, VEX; } -defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8, - int_x86_ssse3_pabs_b, - int_x86_ssse3_pabs_b_128>; -defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16, - int_x86_ssse3_pabs_w, - int_x86_ssse3_pabs_w_128>; -defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32, - int_x86_ssse3_pabs_d, - int_x86_ssse3_pabs_d_128>; +defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8, + int_x86_ssse3_pabs_b_128>, + SS3I_unop_rm_int_mm<0x1C, "pabsb", memopv8i8, + int_x86_ssse3_pabs_b>; + +defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16, + int_x86_ssse3_pabs_w_128>, + SS3I_unop_rm_int_mm<0x1D, "pabsw", memopv4i16, + int_x86_ssse3_pabs_w>; + +defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32, + int_x86_ssse3_pabs_d_128>, + SS3I_unop_rm_int_mm<0x1E, "pabsd", memopv2i32, + int_x86_ssse3_pabs_d>; //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions @@ -3267,26 +3592,9 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32, /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, - PatFrag mem_frag64, PatFrag mem_frag128, - Intrinsic IntId64, Intrinsic IntId128, + PatFrag mem_frag128, Intrinsic IntId128, bit Is2Addr = 1> { let isCommutable = 1 in - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !if(Is2Addr, - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !if(Is2Addr, - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR64:$dst, - (IntId64 VR64:$src1, - (bitconvert (memopv8i8 addr:$src2))))]>; - - let isCommutable = 1 in def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !if(Is2Addr, @@ -3303,88 +3611,102 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, (IntId128 VR128:$src1, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } +multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, + PatFrag mem_frag64, Intrinsic IntId64> { + let isCommutable = 1 in + def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; + def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR64:$dst, + (IntId64 VR64:$src1, + (bitconvert (memopv8i8 addr:$src2))))]>; +} let isAsmParserOnly = 1, Predicates = [HasAVX] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16, - int_x86_ssse3_phadd_w, + defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16, int_x86_ssse3_phadd_w_128, 0>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32, - int_x86_ssse3_phadd_d, + defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32, int_x86_ssse3_phadd_d_128, 0>, VEX_4V; - defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16, - int_x86_ssse3_phadd_sw, + defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16, int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16, - int_x86_ssse3_phsub_w, + defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16, int_x86_ssse3_phsub_w_128, 0>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32, - int_x86_ssse3_phsub_d, + defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32, int_x86_ssse3_phsub_d_128, 0>, VEX_4V; - defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16, - int_x86_ssse3_phsub_sw, + defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16, int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; - defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8, - int_x86_ssse3_pmadd_ub_sw, + defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8, int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8, - int_x86_ssse3_pshuf_b, + defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8, int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8, - int_x86_ssse3_psign_b, + defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8, int_x86_ssse3_psign_b_128, 0>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16, - int_x86_ssse3_psign_w, + defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16, int_x86_ssse3_psign_w_128, 0>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32, - int_x86_ssse3_psign_d, + defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32, int_x86_ssse3_psign_d_128, 0>, VEX_4V; } -defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16, - int_x86_ssse3_pmul_hr_sw, +defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16, int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; } // None of these have i8 immediate fields. let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { - defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16, - int_x86_ssse3_phadd_w, - int_x86_ssse3_phadd_w_128>; - defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32, - int_x86_ssse3_phadd_d, - int_x86_ssse3_phadd_d_128>; - defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16, - int_x86_ssse3_phadd_sw, - int_x86_ssse3_phadd_sw_128>; - defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16, - int_x86_ssse3_phsub_w, - int_x86_ssse3_phsub_w_128>; - defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32, - int_x86_ssse3_phsub_d, - int_x86_ssse3_phsub_d_128>; - defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16, - int_x86_ssse3_phsub_sw, - int_x86_ssse3_phsub_sw_128>; - defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8, - int_x86_ssse3_pmadd_ub_sw, - int_x86_ssse3_pmadd_ub_sw_128>; - defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8, - int_x86_ssse3_pshuf_b, - int_x86_ssse3_pshuf_b_128>; - defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8, - int_x86_ssse3_psign_b, - int_x86_ssse3_psign_b_128>; - defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16, - int_x86_ssse3_psign_w, - int_x86_ssse3_psign_w_128>; - defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32, - int_x86_ssse3_psign_d, - int_x86_ssse3_psign_d_128>; -} -defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16, - int_x86_ssse3_pmul_hr_sw, - int_x86_ssse3_pmul_hr_sw_128>; + defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16, + int_x86_ssse3_phadd_w_128>, + SS3I_binop_rm_int_mm<0x01, "phaddw", memopv4i16, + int_x86_ssse3_phadd_w>; + defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32, + int_x86_ssse3_phadd_d_128>, + SS3I_binop_rm_int_mm<0x02, "phaddd", memopv2i32, + int_x86_ssse3_phadd_d>; + defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16, + int_x86_ssse3_phadd_sw_128>, + SS3I_binop_rm_int_mm<0x03, "phaddsw", memopv4i16, + int_x86_ssse3_phadd_sw>; + defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16, + int_x86_ssse3_phsub_w_128>, + SS3I_binop_rm_int_mm<0x05, "phsubw", memopv4i16, + int_x86_ssse3_phsub_w>; + defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32, + int_x86_ssse3_phsub_d_128>, + SS3I_binop_rm_int_mm<0x06, "phsubd", memopv2i32, + int_x86_ssse3_phsub_d>; + defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16, + int_x86_ssse3_phsub_sw_128>, + SS3I_binop_rm_int_mm<0x07, "phsubsw", memopv4i16, + int_x86_ssse3_phsub_sw>; + defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8, + int_x86_ssse3_pmadd_ub_sw_128>, + SS3I_binop_rm_int_mm<0x04, "pmaddubsw", memopv8i8, + int_x86_ssse3_pmadd_ub_sw>; + defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, + int_x86_ssse3_pshuf_b_128>, + SS3I_binop_rm_int_mm<0x00, "pshufb", memopv8i8, + int_x86_ssse3_pshuf_b>; + defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8, + int_x86_ssse3_psign_b_128>, + SS3I_binop_rm_int_mm<0x08, "psignb", memopv8i8, + int_x86_ssse3_psign_b>; + defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16, + int_x86_ssse3_psign_w_128>, + SS3I_binop_rm_int_mm<0x09, "psignw", memopv4i16, + int_x86_ssse3_psign_w>; + defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32, + int_x86_ssse3_psign_d_128>, + SS3I_binop_rm_int_mm<0x0A, "psignd", memopv2i32, + int_x86_ssse3_psign_d>; +} +defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16, + int_x86_ssse3_pmul_hr_sw_128>, + SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", memopv4i16, + int_x86_ssse3_pmul_hr_sw>; } def : Pat<(X86pshufb VR128:$src, VR128:$mask), @@ -3396,22 +3718,16 @@ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// -multiclass sse3_palign<string asm, bit Is2Addr = 1> { +multiclass ssse3_palign_mm<string asm> { def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), - !if(Is2Addr, - !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - !strconcat(asm, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - []>; + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2, i8imm:$src3), - !if(Is2Addr, - !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - !strconcat(asm, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - []>; + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; +} +multiclass ssse3_palign<string asm, bit Is2Addr = 1> { def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !if(Is2Addr, @@ -3429,9 +3745,10 @@ multiclass sse3_palign<string asm, bit Is2Addr = 1> { } let isAsmParserOnly = 1, Predicates = [HasAVX] in - defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V; + defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; let Constraints = "$src1 = $dst" in - defm PALIGN : sse3_palign<"palignr">; + defm PALIGN : ssse3_palign<"palignr">, + ssse3_palign_mm<"palignr">; let AddedComplexity = 5 in { @@ -3732,31 +4049,62 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>; // Use movaps / movups for SSE integer load / store (one byte shorter). -def : Pat<(alignedloadv4i32 addr:$src), - (MOVAPSrm addr:$src)>; -def : Pat<(loadv4i32 addr:$src), - (MOVUPSrm addr:$src)>; -def : Pat<(alignedloadv2i64 addr:$src), - (MOVAPSrm addr:$src)>; -def : Pat<(loadv2i64 addr:$src), - (MOVUPSrm addr:$src)>; - -def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; +let Predicates = [HasSSE1] in { + def : Pat<(alignedloadv4i32 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv4i32 addr:$src), + (MOVUPSrm addr:$src)>; + def : Pat<(alignedloadv2i64 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv2i64 addr:$src), + (MOVUPSrm addr:$src)>; + + def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +} + +// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter). +let Predicates = [HasAVX] in { + def : Pat<(alignedloadv4i32 addr:$src), + (VMOVAPSrm addr:$src)>; + def : Pat<(loadv4i32 addr:$src), + (VMOVUPSrm addr:$src)>; + def : Pat<(alignedloadv2i64 addr:$src), + (VMOVAPSrm addr:$src)>; + def : Pat<(loadv2i64 addr:$src), + (VMOVUPSrm addr:$src)>; + + def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (VMOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (VMOVUPSmr addr:$dst, VR128:$src)>; +} //===----------------------------------------------------------------------===// // SSE4.1 - Packed Move with Sign/Zero Extend @@ -3923,8 +4271,12 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let isAsmParserOnly = 1, Predicates = [HasAVX] in { defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; + def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX; +} defm PEXTRB : SS41I_extract8<0x14, "pextrb">; @@ -4007,8 +4359,13 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { addr:$dst)]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let isAsmParserOnly = 1, Predicates = [HasAVX] in { defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; + def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, OpSize, VEX; +} defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; // Also match an EXTRACTPS store when the store is done as f32 instead of i32. @@ -4131,80 +4488,84 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), - (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; + (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, + Requires<[HasAVX]>; +def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), + (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, + Requires<[HasSSE41]>; //===----------------------------------------------------------------------===// // SSE4.1 - Round Instructions //===----------------------------------------------------------------------===// -multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, - string OpcodeStr, - Intrinsic V4F32Int, - Intrinsic V2F64Int> { +multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, + X86MemOperand x86memop, RegisterClass RC, + PatFrag mem_frag32, PatFrag mem_frag64, + Intrinsic V4F32Int, Intrinsic V2F64Int> { // Intrinsic operation, reg. // Vector intrinsic operation, reg def PSr_Int : SS4AIi8<opcps, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>, + [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>, OpSize; // Vector intrinsic operation, mem def PSm_Int : Ii8<opcps, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, - (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>, + [(set RC:$dst, + (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>, TA, OpSize, Requires<[HasSSE41]>; // Vector intrinsic operation, reg def PDr_Int : SS4AIi8<opcpd, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>, + [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>, OpSize; // Vector intrinsic operation, mem def PDm_Int : SS4AIi8<opcpd, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, - (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>, + [(set RC:$dst, + (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>, OpSize; } -multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd, - string OpcodeStr> { +multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd, + RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> { // Intrinsic operation, reg. // Vector intrinsic operation, reg def PSr : SS4AIi8<opcps, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; // Vector intrinsic operation, mem def PSm : Ii8<opcps, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, TA, OpSize, Requires<[HasSSE41]>; // Vector intrinsic operation, reg def PDr : SS4AIi8<opcpd, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; // Vector intrinsic operation, mem def PDm : SS4AIi8<opcpd, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; @@ -4261,8 +4622,8 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, OpSize; } -multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd, - string OpcodeStr> { +multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd, + string OpcodeStr> { // Intrinsic operation, reg. def SSr : SS4AIi8<opcss, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), @@ -4295,24 +4656,90 @@ multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd, // FP round - roundss, roundps, roundsd, roundpd let isAsmParserOnly = 1, Predicates = [HasAVX] in { // Intrinsic form - defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", - int_x86_sse41_round_ps, int_x86_sse41_round_pd>, - VEX; + defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128, + memopv4f32, memopv2f64, + int_x86_sse41_round_ps, + int_x86_sse41_round_pd>, VEX; + defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256, + memopv8f32, memopv4f64, + int_x86_avx_round_ps_256, + int_x86_avx_round_pd_256>, VEX; defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", - int_x86_sse41_round_ss, int_x86_sse41_round_sd, - 0>, VEX_4V; + int_x86_sse41_round_ss, + int_x86_sse41_round_sd, 0>, VEX_4V; + // Instructions for the assembler - defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX; - defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V; + defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">, + VEX; + defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">, + VEX; + defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V; } -defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", +defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, + memopv4f32, memopv2f64, int_x86_sse41_round_ps, int_x86_sse41_round_pd>; let Constraints = "$src1 = $dst" in defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", int_x86_sse41_round_ss, int_x86_sse41_round_sd>; //===----------------------------------------------------------------------===// +// SSE4.1 - Packed Bit Test +//===----------------------------------------------------------------------===// + +// ptest instruction we'll lower to this in X86ISelLowering primarily from +// the intel intrinsic that corresponds to this. +let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + OpSize, VEX; +def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + OpSize, VEX; + +def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, + OpSize, VEX; +def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>, + OpSize, VEX; +} + +let Defs = [EFLAGS] in { +def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "ptest \t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + OpSize; +def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), + "ptest \t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + OpSize; +} + +// The bit test instructions below are AVX only +multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { + def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX; + def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, + OpSize, VEX; +} + +let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>; +defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>; +defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>; +defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; +} + +//===----------------------------------------------------------------------===// // SSE4.1 - Misc Instructions //===----------------------------------------------------------------------===// @@ -4431,79 +4858,104 @@ let Constraints = "$src1 = $dst" in /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Is2Addr = 1> { + Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit Is2Addr = 1> { let isCommutable = 1 in - def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set VR128:$dst, - (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>, + [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, OpSize; - def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3), + def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>, + [(set RC:$dst, + (IntId RC:$src1, + (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, OpSize; } let isAsmParserOnly = 1, Predicates = [HasAVX] in { let isCommutable = 0 in { defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; + defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", + int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", + int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; } defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, - 0>, VEX_4V; + VR128, memopv16i8, i128mem, 0>, VEX_4V; + defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, + VR256, memopv32i8, i256mem, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { - defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>; - defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>; - defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>; - defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>; + defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, + VR128, memopv16i8, i128mem>; + defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, + VR128, memopv16i8, i128mem>; + defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, + VR128, memopv16i8, i128mem>; + defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, + VR128, memopv16i8, i128mem>; } - defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>; - defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>; + defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, + VR128, memopv16i8, i128mem>; + defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, + VR128, memopv16i8, i128mem>; } /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators let isAsmParserOnly = 1, Predicates = [HasAVX] in { - multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> { - def rr : I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; - - def rm : I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, VR128:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; - } -} - -defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">; -defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">; -defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">; +multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, + RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_frag, Intrinsic IntId> { + def rr : I<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], + SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + + def rm : I<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2, RC:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set RC:$dst, + (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), + RC:$src3))], + SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; +} +} + +defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem, + memopv16i8, int_x86_sse41_blendvpd>; +defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem, + memopv16i8, int_x86_sse41_blendvps>; +defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, + memopv16i8, int_x86_sse41_pblendvb>; +defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, + memopv32i8, int_x86_avx_blendv_pd_256>; +defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, + memopv32i8, int_x86_avx_blendv_ps_256>; /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { @@ -4529,30 +4981,6 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; -// ptest instruction we'll lower to this in X86ISelLowering primarily from -// the intel intrinsic that corresponds to this. -let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { -def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, - OpSize, VEX; -def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), - "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, - OpSize, VEX; -} - -let Defs = [EFLAGS] in { -def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, - OpSize; -def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, - OpSize; -} - let isAsmParserOnly = 1, Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", @@ -4603,17 +5031,20 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), //===----------------------------------------------------------------------===// // Packed Compare Implicit Length Strings, Return Mask -let Defs = [EFLAGS], usesCustomInserter = 1 in { - def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "#PCMPISTRM128rr PSEUDO!", +multiclass pseudo_pcmpistrm<string asm> { + def REG : Ii8<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"), [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, - imm:$src3))]>, OpSize; - def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "#PCMPISTRM128rm PSEUDO!", + imm:$src3))]>; + def MEM : Ii8<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"), [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 - VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize; + VR128:$src1, (load addr:$src2), imm:$src3))]>; +} + +let Defs = [EFLAGS], usesCustomInserter = 1 in { + defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>; + defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; } let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1, @@ -4636,20 +5067,20 @@ let Defs = [XMM0, EFLAGS] in { } // Packed Compare Explicit Length Strings, Return Mask -let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { - def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "#PCMPESTRM128rr PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; - - def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "#PCMPESTRM128rm PSEUDO!", +multiclass pseudo_pcmpestrm<string asm> { + def REG : Ii8<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"), + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; + def MEM : Ii8<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"), [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, - OpSize; + VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>; +} + +let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { + defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>; + defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; } let isAsmParserOnly = 1, Predicates = [HasAVX], @@ -4941,3 +5372,579 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), imm:$src2))]>, OpSize; + +//===----------------------------------------------------------------------===// +// CLMUL Instructions +//===----------------------------------------------------------------------===// + +// Only the AVX version of CLMUL instructions are described here. + +// Carry-less Multiplication instructions +let isAsmParserOnly = 1 in { +def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>; + +def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>; + +// Assembler Only +multiclass avx_vpclmul<string asm> { + def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + + def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; +} +defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">; +defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">; +defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">; +defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">; + +} // isAsmParserOnly + +//===----------------------------------------------------------------------===// +// AVX Instructions +//===----------------------------------------------------------------------===// + +let isAsmParserOnly = 1 in { + +// Load from memory and broadcast to all elements of the destination operand +class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, Intrinsic Int> : + AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (Int addr:$src))]>, VEX; + +def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, + int_x86_avx_vbroadcastss>; +def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, + int_x86_avx_vbroadcastss_256>; +def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, + int_x86_avx_vbroadcast_sd_256>; +def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, + int_x86_avx_vbroadcastf128_pd_256>; + +// Insert packed floating-point values +def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR128:$src2, i8imm:$src3), + "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, VEX_4V; +def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f128mem:$src2, i8imm:$src3), + "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, VEX_4V; + +// Extract packed floating-point values +def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), + (ins VR256:$src1, i8imm:$src2), + "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, VEX; +def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), + (ins f128mem:$dst, VR256:$src1, i8imm:$src2), + "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, VEX; + +// Conditional SIMD Packed Loads and Stores +multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, + Intrinsic IntLd, Intrinsic IntLd256, + Intrinsic IntSt, Intrinsic IntSt256, + PatFrag pf128, PatFrag pf256> { + def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, + VEX_4V; + def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, + VEX_4V; + def mr : AVX8I<opc_mr, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; + def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V; +} + +defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", + int_x86_avx_maskload_ps, + int_x86_avx_maskload_ps_256, + int_x86_avx_maskstore_ps, + int_x86_avx_maskstore_ps_256, + memopv4f32, memopv8f32>; +defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", + int_x86_avx_maskload_pd, + int_x86_avx_maskload_pd_256, + int_x86_avx_maskstore_pd, + int_x86_avx_maskstore_pd_256, + memopv2f64, memopv4f64>; + +// Permute Floating-Point Values +multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, + RegisterClass RC, X86MemOperand x86memop_f, + X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag, + Intrinsic IntVar, Intrinsic IntImm> { + def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V; + def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop_i:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V; + + def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX; + def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), + (ins x86memop_f:$src1, i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX; +} + +defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, + memopv4f32, memopv4i32, + int_x86_avx_vpermilvar_ps, + int_x86_avx_vpermil_ps>; +defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, + memopv8f32, memopv8i32, + int_x86_avx_vpermilvar_ps_256, + int_x86_avx_vpermil_ps_256>; +defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, + memopv2f64, memopv2i64, + int_x86_avx_vpermilvar_pd, + int_x86_avx_vpermil_pd>; +defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, + memopv4f64, memopv4i64, + int_x86_avx_vpermilvar_pd_256, + int_x86_avx_vpermil_pd_256>; + +def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, i8imm:$src3), + "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, VEX_4V; +def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2, i8imm:$src3), + "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + []>, VEX_4V; + +// Zero All YMM registers +def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", + [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>; + +// Zero Upper bits of YMM registers +def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", + [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>; + +} // isAsmParserOnly + +def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), + (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; + +def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), + (VEXTRACTF128rr VR256:$src1, imm:$src2)>; + +def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), + (VBROADCASTF128 addr:$src)>; + +def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; + +def : Pat<(int_x86_avx_vperm2f128_ps_256 + VR256:$src1, (memopv8f32 addr:$src2), imm:$src3), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_pd_256 + VR256:$src1, (memopv4f64 addr:$src2), imm:$src3), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(int_x86_avx_vperm2f128_si_256 + VR256:$src1, (memopv8i32 addr:$src2), imm:$src3), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; + +//===----------------------------------------------------------------------===// +// SSE Shuffle pattern fragments +//===----------------------------------------------------------------------===// + +// This is part of a "work in progress" refactoring. The idea is that all +// vector shuffles are going to be translated into target specific nodes and +// directly matched by the patterns below (which can be changed along the way) +// The AVX version of some but not all of them are described here, and more +// should come in a near future. + +// Shuffle with PSHUFD instruction folding loads. The first two patterns match +// SSE2 loads, which are always promoted to v2i64. The last one should match +// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD +// in SSE2, how does it ever worked? Anyway, the pattern will remain here until +// we investigate further. +def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), + (i8 imm:$imm))), + (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), + (i8 imm:$imm))), + (PSHUFDmi addr:$src1, imm:$imm)>; +def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), + (i8 imm:$imm))), + (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked? + +// Shuffle with PSHUFD instruction. +def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (PSHUFDri VR128:$src1, imm:$imm)>; + +def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (PSHUFDri VR128:$src1, imm:$imm)>; + +// Shuffle with SHUFPD instruction. +def : Pat<(v2f64 (X86Shufps VR128:$src1, + (memopv2f64 addr:$src2), (i8 imm:$imm))), + (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Shufps VR128:$src1, + (memopv2f64 addr:$src2), (i8 imm:$imm))), + (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; + +def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + +def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + +// Shuffle with SHUFPS instruction. +def : Pat<(v4f32 (X86Shufps VR128:$src1, + (memopv4f32 addr:$src2), (i8 imm:$imm))), + (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Shufps VR128:$src1, + (memopv4f32 addr:$src2), (i8 imm:$imm))), + (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + +def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + +def : Pat<(v4i32 (X86Shufps VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), + (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4i32 (X86Shufps VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), + (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + +def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>; +def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; + +// Shuffle with MOVHLPS instruction +def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), + (MOVHLPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), + (MOVHLPSrr VR128:$src1, VR128:$src2)>; + +// Shuffle with MOVDDUP instruction +def : Pat<(X86Movddup (memopv2f64 addr:$src)), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (memopv2f64 addr:$src)), + (MOVDDUPrm addr:$src)>; + +def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))), + (MOVDDUPrm addr:$src)>; + +def : Pat<(X86Movddup (memopv2i64 addr:$src)), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (memopv2i64 addr:$src)), + (MOVDDUPrm addr:$src)>; + +def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))), + (MOVDDUPrm addr:$src)>; + +def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), + (MOVDDUPrm addr:$src)>; + +def : Pat<(X86Movddup (bc_v2f64 + (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; +def : Pat<(X86Movddup (bc_v2f64 + (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (MOVDDUPrm addr:$src)>; + +// Shuffle with UNPCKLPS +def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + (UNPCKLPSrm VR128:$src1, addr:$src2)>; + +def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + (UNPCKLPSrr VR128:$src1, VR128:$src2)>; + +// Shuffle with UNPCKHPS +def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + (UNPCKHPSrm VR128:$src1, addr:$src2)>; + +def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + (UNPCKHPSrr VR128:$src1, VR128:$src2)>; + +// Shuffle with UNPCKLPD +def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + (UNPCKLPSrm VR128:$src1, addr:$src2)>; + +def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + (UNPCKLPDrr VR128:$src1, VR128:$src2)>; + +// Shuffle with UNPCKHPD +def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + (UNPCKLPSrm VR128:$src1, addr:$src2)>; + +def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + (UNPCKHPDrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKLBW +def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), + (PUNPCKLBWrm VR128:$src1, addr:$src2)>; +def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)), + (PUNPCKLBWrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKLWD +def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), + (PUNPCKLWDrm VR128:$src1, addr:$src2)>; +def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)), + (PUNPCKLWDrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKLDQ +def : Pat<(v4i32 (X86Punpckldq VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), + (PUNPCKLDQrm VR128:$src1, addr:$src2)>; +def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)), + (PUNPCKLDQrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKLQDQ +def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))), + (PUNPCKLQDQrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)), + (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKHBW +def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), + (PUNPCKHBWrm VR128:$src1, addr:$src2)>; +def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)), + (PUNPCKHBWrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKHWD +def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), + (PUNPCKHWDrm VR128:$src1, addr:$src2)>; +def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)), + (PUNPCKHWDrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKHDQ +def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), + (PUNPCKHDQrm VR128:$src1, addr:$src2)>; +def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)), + (PUNPCKHDQrr VR128:$src1, VR128:$src2)>; + +// Shuffle with PUNPCKHQDQ +def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))), + (PUNPCKHQDQrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)), + (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>; + +// Shuffle with MOVLHPS +def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (MOVHPSrm VR128:$src1, addr:$src2)>; +def : Pat<(X86Movlhps VR128:$src1, + (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm VR128:$src1, addr:$src2)>; +def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), + (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; + +// Shuffle with MOVLHPD +def : Pat<(v2f64 (X86Movlhpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVHPDrm VR128:$src1, addr:$src2)>; +// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem +// is during lowering, where it's not possible to recognize the load fold cause +// it has two uses through a bitcast. One use disappears at isel time and the +// fold opportunity reappears. +def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVHPDrm VR128:$src1, addr:$src2)>; + +// Shuffle with MOVSS +def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), + (MOVSSrr VR128:$src1, FR32:$src2)>; +def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; +def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; +// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem +// is during lowering, where it's not possible to recognize the load fold cause +// it has two uses through a bitcast. One use disappears at isel time and the +// fold opportunity reappears. +def : Pat<(X86Movss VR128:$src1, + (bc_v4i32 (v2i64 (load addr:$src2)))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + +// Shuffle with MOVSD +def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), + (MOVSDrr VR128:$src1, FR64:$src2)>; +def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; +def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; +def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; +def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; + +// Shuffle with MOVSHDUP +def : Pat<(v4i32 (X86Movshdup VR128:$src)), + (MOVSHDUPrr VR128:$src)>; +def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))), + (MOVSHDUPrm addr:$src)>; + +def : Pat<(v4f32 (X86Movshdup VR128:$src)), + (MOVSHDUPrr VR128:$src)>; +def : Pat<(X86Movshdup (memopv4f32 addr:$src)), + (MOVSHDUPrm addr:$src)>; + +// Shuffle with MOVSLDUP +def : Pat<(v4i32 (X86Movsldup VR128:$src)), + (MOVSLDUPrr VR128:$src)>; +def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))), + (MOVSLDUPrm addr:$src)>; + +def : Pat<(v4f32 (X86Movsldup VR128:$src)), + (MOVSLDUPrr VR128:$src)>; +def : Pat<(X86Movsldup (memopv4f32 addr:$src)), + (MOVSLDUPrm addr:$src)>; + +// Shuffle with PSHUFHW +def : Pat<(v8i16 (X86PShufhwLd addr:$src, (i8 imm:$imm))), + (PSHUFHWmi addr:$src, imm:$imm)>; +def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), + (PSHUFHWri VR128:$src, imm:$imm)>; +def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), + (PSHUFHWmi addr:$src, imm:$imm)>; + +// Shuffle with PSHUFLW +def : Pat<(v8i16 (X86PShuflwLd addr:$src, (i8 imm:$imm))), + (PSHUFLWmi addr:$src, imm:$imm)>; +def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), + (PSHUFLWri VR128:$src, imm:$imm)>; +def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), + (PSHUFLWmi addr:$src, imm:$imm)>; + +// Shuffle with PALIGN +def : Pat<(v1i64 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), + (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; +def : Pat<(v2i32 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), + (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; +def : Pat<(v4i16 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), + (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; +def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), + (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; + +def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; + +// Shuffle with MOVLPS +def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; +def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), + (MOVLPSrm VR128:$src1, addr:$src2)>; +def : Pat<(X86Movlps VR128:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), + (MOVLPSrm VR128:$src1, addr:$src2)>; + +// Shuffle with MOVLPD +def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; +def : Pat<(v2f64 (X86Movlpd VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)))), + (MOVLPDrm VR128:$src1, addr:$src2)>; + +// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD +def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst), + (MOVHPSmr addr:$dst, VR128:$src)>; +def : Pat<(store (f64 (vector_extract + (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), + (MOVHPDmr addr:$dst, VR128:$src)>; + +def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; +def : Pat<(store (v4i32 (X86Movlps + (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; + +def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; +def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1), + (MOVLPDmr addr:$src1, VR128:$src2)>; diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index 2b8720bac3438..36badb403e815 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -103,6 +103,9 @@ getNonexecutableStackSection(MCContext &Ctx) const { } X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { + if (Triple.getArch() == Triple::x86_64) + GlobalPrefix = ""; + AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 23b0666f5f30f..9564fe0b92d4c 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -365,7 +365,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, const TargetInstrDesc &Desc, raw_ostream &OS) const { bool HasVEX_4V = false; - if (TSFlags & X86II::VEX_4V) + if ((TSFlags >> 32) & X86II::VEX_4V) HasVEX_4V = true; // VEX_R: opcode externsion equivalent to REX.R in @@ -429,10 +429,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if (TSFlags & X86II::OpSize) VEX_PP = 0x01; - if (TSFlags & X86II::VEX_W) + if ((TSFlags >> 32) & X86II::VEX_W) VEX_W = 1; - if (TSFlags & X86II::VEX_L) + if ((TSFlags >> 32) & X86II::VEX_L) VEX_L = 1; switch (TSFlags & X86II::Op0Mask) { @@ -469,33 +469,39 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, unsigned NumOps = MI.getNumOperands(); unsigned CurOp = 0; + bool IsDestMem = false; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); + case X86II::MRMDestMem: + IsDestMem = true; + // The important info for the VEX prefix is never beyond the address + // registers. Don't check beyond that. + NumOps = CurOp = X86::AddrNumOperands; case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: - case X86II::MRMDestMem: - NumOps = CurOp = X86::AddrNumOperands; case X86II::MRMSrcMem: case X86II::MRMSrcReg: if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; - - // CurOp and NumOps are equal when VEX_R represents a register used - // to index a memory destination (which is the last operand) - CurOp = (CurOp == NumOps) ? 0 : CurOp+1; + CurOp++; if (HasVEX_4V) { - VEX_4V = getVEXRegisterEncoding(MI, CurOp); + VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp); CurOp++; } + // To only check operands before the memory address ones, start + // the search from the begining + if (IsDestMem) + CurOp = 0; + // If the last register should be encoded in the immediate field // do not use any bit from VEX prefix to this register, ignore it - if (TSFlags & X86II::VEX_I8IMM) + if ((TSFlags >> 32) & X86II::VEX_I8IMM) NumOps--; for (; CurOp != NumOps; ++CurOp) { @@ -508,7 +514,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_X = 0x0; } break; - default: // MRMDestReg, MRM0r-MRM7r + default: // MRMDestReg, MRM0r-MRM7r, RawFrm + if (!MI.getNumOperands()) + break; + if (MI.getOperand(CurOp).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0; @@ -524,7 +533,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_R = 0x0; } break; - assert(0 && "Not implemented!"); } // Emit segment override opcode prefix as needed. @@ -793,9 +801,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // It uses the VEX.VVVV field? bool HasVEX_4V = false; - if (TSFlags & X86II::VEX) + if ((TSFlags >> 32) & X86II::VEX) HasVEXPrefix = true; - if (TSFlags & X86II::VEX_4V) + if ((TSFlags >> 32) & X86II::VEX_4V) HasVEX_4V = true; // Determine where the memory operand starts, if present. @@ -819,6 +827,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); break; + + case X86II::RawFrmImm16: + EmitByte(BaseOpcode, CurByte, OS); + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); + EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups); + break; case X86II::AddRegFrm: EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS); @@ -833,10 +849,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMDestMem: EmitByte(BaseOpcode, CurByte, OS); + SrcRegNum = CurOp + X86::AddrNumOperands; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + EmitMemModRMByte(MI, CurOp, - GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)), + GetX86RegNum(MI.getOperand(SrcRegNum)), TSFlags, CurByte, OS, Fixups); - CurOp += X86::AddrNumOperands + 1; + CurOp = SrcRegNum + 1; break; case X86II::MRMSrcReg: @@ -934,7 +955,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if (CurOp != NumOps) { // The last source register of a 4 operand instruction in AVX is encoded // in bits[7:4] of a immediate byte, and bits[3:0] are ignored. - if (TSFlags & X86II::VEX_I8IMM) { + if ((TSFlags >> 32) & X86II::VEX_I8IMM) { const MCOperand &MO = MI.getOperand(CurOp++); bool IsExtReg = X86InstrInfo::isX86_64ExtendedReg(MO.getReg()); diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index e67fc06a6cd75..8c4620f921771 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -16,7 +16,6 @@ #include "X86AsmPrinter.h" #include "X86COFFMachineModuleInfo.h" #include "X86MCAsmInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -29,21 +28,19 @@ #include "llvm/Type.h" using namespace llvm; - -const X86Subtarget &X86MCInstLower::getSubtarget() const { - return AsmPrinter.getSubtarget(); -} +X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf, + X86AsmPrinter &asmprinter) +: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()), + MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {} MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { - assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin"); - return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); + return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>(); } MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { - const TargetLowering *TLI = AsmPrinter.TM.getTargetLowering(); - return static_cast<const X86TargetLowering*>(TLI)-> - getPICBaseSymbol(AsmPrinter.MF, Ctx); + return static_cast<const X86TargetLowering*>(TM.getTargetLowering())-> + getPICBaseSymbol(&MF, Ctx); } /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol @@ -56,7 +53,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { if (!MO.isGlobal()) { assert(MO.isSymbol()); - Name += AsmPrinter.MAI->getGlobalPrefix(); + Name += MAI.getGlobalPrefix(); Name += MO.getSymbolName(); } else { const GlobalValue *GV = MO.getGlobal(); @@ -91,7 +88,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { assert(MO.isGlobal() && "Extern symbol not handled yet"); StubSym = MachineModuleInfoImpl:: - StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()), + StubValueTy(Mang->getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } return Sym; @@ -105,7 +102,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { assert(MO.isGlobal() && "Extern symbol not handled yet"); StubSym = MachineModuleInfoImpl:: - StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()), + StubValueTy(Mang->getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } return Sym; @@ -121,7 +118,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { if (MO.isGlobal()) { StubSym = MachineModuleInfoImpl:: - StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()), + StubValueTy(Mang->getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } else { Name.erase(Name.end()-5, Name.end()); @@ -178,7 +175,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx), Ctx); - if (MO.isJTI() && AsmPrinter.MAI->hasSetDirective()) { + if (MO.isJTI() && MAI.hasSetDirective()) { // If .set directive is supported, use it to reduce the number of // relocations the assembler will generate for differences between // local labels. This is only safe when the symbols are in the same @@ -255,7 +252,13 @@ static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { } /// \brief Simplify things like MOV32rm to MOV32o32a. -static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) { +static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, + unsigned Opcode) { + // Don't make these simplifications in 64-bit mode; other assemblers don't + // perform them because they make the code larger. + if (Printer.getSubtarget().is64Bit()) + return; + bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg(); unsigned AddrBase = IsStore; unsigned RegOp = IsStore ? 0 : 5; @@ -336,7 +339,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; case MachineOperand::MO_BlockAddress: MCOp = LowerSymbolOperand(MO, - AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); + AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); break; } @@ -377,12 +380,17 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break; case X86::MMX_V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break; - case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; - case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break; - case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; + case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; + case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break; + case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; + case X86::AVX_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break; + case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break; + case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break; + case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; + case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; case X86::MOV16r0: LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 @@ -393,12 +401,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; - // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have + // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have // register inputs modeled as normal uses instead of implicit uses. As such, // truncate off all but the first operand (the callee). FIXME: Change isel. case X86::TAILJMPr64: case X86::CALL64r: - case X86::CALL64pcrel32: { + case X86::CALL64pcrel32: + case X86::WINCALL64r: + case X86::WINCALL64pcrel32: { unsigned Opcode = OutMI.getOpcode(); MCOperand Saved = OutMI.getOperand(0); OutMI = MCInst(); @@ -456,15 +466,13 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { // MOV64ao8, MOV64o8a // XCHG16ar, XCHG32ar, XCHG64ar case X86::MOV8mr_NOREX: - case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break; + case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break; case X86::MOV8rm_NOREX: - case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break; - case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break; - case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break; - case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break; - case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break; - case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break; - case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break; + case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break; + case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break; + case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break; + case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break; + case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break; case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break; case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break; @@ -505,46 +513,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } } -void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &O) { - // Only the target-dependent form of DBG_VALUE should get here. - // Referencing the offset and metadata as NOps-2 and NOps-1 is - // probably portable to other targets; frame pointer location is not. - unsigned NOps = MI->getNumOperands(); - assert(NOps==7); - O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); - if (V.getContext().isSubprogram()) - O << DISubprogram(V.getContext()).getDisplayName() << ":"; - O << V.getName(); - O << " <- "; - // Frame address. Currently handles register +- offset only. - O << '['; - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) - printOperand(MI, 0, O); - else - O << "undef"; - O << '+'; printOperand(MI, 3, O); - O << ']'; - O << "+"; - printOperand(MI, NOps-2, O); -} - -MachineLocation -X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { - MachineLocation Location; - assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - - if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); - return Location; -} - void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { - X86MCInstLower MCInstLowering(OutContext, Mang, *this); + X86MCInstLower MCInstLowering(Mang, *MF, *this); switch (MI->getOpcode()) { case TargetOpcode::DBG_VALUE: if (isVerbose() && OutStreamer.hasRawTextSupport()) { @@ -555,6 +526,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; + // Emit nothing here but a comment if we can. + case X86::Int_MemBarrier: + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER")); + return; + case X86::TAILJMPr: case X86::TAILJMPd: case X86::TAILJMPd64: diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h index 9e5474fc81b33..539b09be6fd74 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h +++ b/lib/Target/X86/X86MCInstLower.h @@ -13,27 +13,30 @@ #include "llvm/Support/Compiler.h" namespace llvm { + class MCAsmInfo; class MCContext; class MCInst; class MCOperand; class MCSymbol; class MachineInstr; + class MachineFunction; class MachineModuleInfoMachO; class MachineOperand; class Mangler; + class TargetMachine; class X86AsmPrinter; - class X86Subtarget; /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. class LLVM_LIBRARY_VISIBILITY X86MCInstLower { MCContext &Ctx; Mangler *Mang; + const MachineFunction &MF; + const TargetMachine &TM; + const MCAsmInfo &MAI; X86AsmPrinter &AsmPrinter; - - const X86Subtarget &getSubtarget() const; public: - X86MCInstLower(MCContext &ctx, Mangler *mang, X86AsmPrinter &asmprinter) - : Ctx(ctx), Mang(mang), AsmPrinter(asmprinter) {} + X86MCInstLower(Mangler *mang, const MachineFunction &MF, + X86AsmPrinter &asmprinter); void Lower(const MachineInstr *MI, MCInst &OutMI) const; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 5f31e00ebabd8..fedd49ebb5403 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -38,8 +38,15 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt<bool> +ForceStackAlign("force-align-stack", + cl::desc("Force align the stack to the minimum alignment" + " needed for the function."), + cl::init(false), cl::Hidden); + X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ? @@ -193,6 +200,12 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::DR7: return 7; + // Pseudo index registers are equivalent to a "none" + // scaled index (See Intel Manual 2A, table 2-3) + case X86::EIZ: + case X86::RIZ: + return 4; + default: assert(isVirtualRegister(RegNo) && "Unknown physical register!"); llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); @@ -456,26 +469,29 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - bool requiresRealignment = - RealignStack && ((MFI->getMaxAlignment() > StackAlign) || - F->hasFnAttr(Attribute::StackAlignment)); + bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttr(Attribute::StackAlignment)); // FIXME: Currently we don't support stack realignment for functions with // variable-sized allocas. - // FIXME: Temporary disable the error - it seems to be too conservative. + // FIXME: It's more complicated than this... if (0 && requiresRealignment && MFI->hasVarSizedObjects()) report_fatal_error( "Stack realignment in presense of dynamic allocas is not supported"); - - return (requiresRealignment && !MFI->hasVarSizedObjects()); + + // If we've requested that we force align the stack do so now. + if (ForceStackAlign) + return canRealignStack(MF); + + return requiresRealignment && canRealignStack(MF); } -bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { +bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } -bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, - int &FrameIdx) const { +bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, + unsigned Reg, int &FrameIdx) const { if (Reg == FramePtr && hasFP(MF)) { FrameIdx = MF.getFrameInfo()->getObjectIndexBegin(); return true; @@ -610,10 +626,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -unsigned +void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const{ + int SPAdj, RegScavenger *RS) const{ assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -660,7 +675,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset(); MI.getOperand(i+3).setOffset(Offset); } - return 0; } void @@ -750,7 +764,7 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, } } -/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator. +/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. static void mergeSPUpdatesDown(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -901,6 +915,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { bool HasFP = hasFP(MF); DebugLoc DL; + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum SlotSize. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else if (MaxAlign < SlotSize) + MaxAlign = SlotSize; + } + // Add RETADDR move area to callee saved frame size. int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) @@ -979,7 +1004,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark the place where EBP/RBP was saved. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); // Define the current CFA rule to use the provided offset. if (StackSize) { @@ -1007,7 +1032,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); // Define the current CFA to use the EBP/RBP register. MachineLocation FPDst(FramePtr); @@ -1047,7 +1072,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (!HasFP && needsFrameMoves) { // Mark callee-saved push instruction. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); // Define the current CFA rule to use the provided offset. unsigned Ptr = StackSize ? @@ -1062,7 +1087,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { DL = MBB.findDebugLoc(MBBI); // Adjust stack pointer: ESP -= numbytes. - if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { + + // Windows and cygwin/mingw require a prologue helper routine when allocating + // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw + // uses __alloca. __alloca and the 32-bit version of __chkstk will probe + // the stack and adjust the stack pointer in one go. The 64-bit version + // of __chkstk is only responsible for probing the stack. The 64-bit + // prologue is responsible for adjusting the stack pointer. Touching the + // stack at 4K increments is necessary to ensure that the guard pages used + // by the OS virtual memory manager are allocated in correct sequence. + if (NumBytes >= 4096 && + (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) { // Check, whether EAX is livein for this function. bool isEAXAlive = false; for (MachineRegisterInfo::livein_iterator @@ -1073,16 +1108,16 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { Reg == X86::AH || Reg == X86::AL); } - // Function prologue calls _alloca to probe the stack when allocating more - // than 4k bytes in one go. Touching the stack at 4K increments is necessary - // to ensure that the guard pages used by the OS virtual memory manager are - // allocated in correct sequence. + + const char *StackProbeSymbol = + Subtarget->isTargetWindows() ? "_chkstk" : "_alloca"; if (!isEAXAlive) { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca") - .addReg(StackPtr, RegState::Define | RegState::Implicit); + .addExternalSymbol(StackProbeSymbol) + .addReg(StackPtr, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); } else { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) @@ -1093,8 +1128,9 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes - 4); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca") - .addReg(StackPtr, RegState::Define | RegState::Implicit); + .addExternalSymbol(StackProbeSymbol) + .addReg(StackPtr, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), @@ -1119,7 +1155,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if ((NumBytes || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. @@ -1172,6 +1208,17 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else + MaxAlign = MaxAlign ? MaxAlign : 4; + } + if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; @@ -1519,7 +1566,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { namespace { struct MSAH : public MachineFunctionPass { static char ID; - MSAH() : MachineFunctionPass(&ID) {} + MSAH() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF) { const X86TargetMachine *TM = diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index d852bcd2011c4..527df05c58fce 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -117,18 +117,17 @@ public: bool needsStackRealignment(const MachineFunction &MF) const; - bool hasReservedCallFrame(MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; - bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, + bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 9f0382e3fae91..95269b15760e0 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -241,6 +241,10 @@ let Namespace = "X86" in { def CR6 : Register<"cr6">; def CR7 : Register<"cr7">; def CR8 : Register<"cr8">; + + // Pseudo index registers + def EIZ : Register<"eiz">; + def RIZ : Register<"riz">; } @@ -804,7 +808,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, }]; } -def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256, +def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15]> { @@ -829,4 +833,15 @@ def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256, // Status flags registers. def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> { let CopyCost = -1; // Don't allow copying of status registers. + + // EFLAGS is not allocatable. + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + CCRClass::iterator + CCRClass::allocation_order_end(const MachineFunction &MF) const { + return allocation_order_begin(MF); + } + }]; } diff --git a/lib/Target/X86/X86ShuffleDecode.h b/lib/Target/X86/X86ShuffleDecode.h new file mode 100644 index 0000000000000..df040520bc8f5 --- /dev/null +++ b/lib/Target/X86/X86ShuffleDecode.h @@ -0,0 +1,155 @@ +//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Define several functions to decode x86 specific shuffle semantics into a +// generic vector mask. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_SHUFFLE_DECODE_H +#define X86_SHUFFLE_DECODE_H + +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Vector Mask Decoding +//===----------------------------------------------------------------------===// + +enum { + SM_SentinelZero = ~0U +}; + +static inline +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { + // Defaults the copying the dest value. + ShuffleMask.push_back(0); + ShuffleMask.push_back(1); + ShuffleMask.push_back(2); + ShuffleMask.push_back(3); + + // Decode the immediate. + unsigned ZMask = Imm & 15; + unsigned CountD = (Imm >> 4) & 3; + unsigned CountS = (Imm >> 6) & 3; + + // CountS selects which input element to use. + unsigned InVal = 4+CountS; + // CountD specifies which element of destination to update. + ShuffleMask[CountD] = InVal; + // ZMask zaps values, potentially overriding the CountD elt. + if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; + if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; + if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; + if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; +} + +// <3,1> or <6,7,2,3> +static void DecodeMOVHLPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = NElts/2; i != NElts; ++i) + ShuffleMask.push_back(NElts+i); + + for (unsigned i = NElts/2; i != NElts; ++i) + ShuffleMask.push_back(i); +} + +// <0,2> or <0,1,4,5> +static void DecodeMOVLHPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) + ShuffleMask.push_back(i); + + for (unsigned i = 0; i != NElts/2; ++i) + ShuffleMask.push_back(NElts+i); +} + +static void DecodePSHUFMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts; ++i) { + ShuffleMask.push_back(Imm % NElts); + Imm /= NElts; + } +} + +static void DecodePSHUFHWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + ShuffleMask.push_back(0); + ShuffleMask.push_back(1); + ShuffleMask.push_back(2); + ShuffleMask.push_back(3); + for (unsigned i = 0; i != 4; ++i) { + ShuffleMask.push_back(4+(Imm & 3)); + Imm >>= 2; + } +} + +static void DecodePSHUFLWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != 4; ++i) { + ShuffleMask.push_back((Imm & 3)); + Imm >>= 2; + } + ShuffleMask.push_back(4); + ShuffleMask.push_back(5); + ShuffleMask.push_back(6); + ShuffleMask.push_back(7); +} + +static void DecodePUNPCKLMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i); + ShuffleMask.push_back(i+NElts); + } +} + +static void DecodePUNPCKHMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i+NElts/2); + ShuffleMask.push_back(i+NElts+NElts/2); + } +} + +static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + // Part that reads from dest. + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(Imm % NElts); + Imm /= NElts; + } + // Part that reads from src. + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(Imm % NElts + NElts); + Imm /= NElts; + } +} + +static void DecodeUNPCKHPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i+NElts/2); // Reads from dest + ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src + } +} + + +/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd +/// etc. NElts indicates the number of elements in the vector allowing it to +/// handle different datatypes and vector widths. +static void DecodeUNPCKLPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i); // Reads from dest + ShuffleMask.push_back(i+NElts); // Reads from src + } +} + +#endif diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 4a10be518f03f..0d02e5ee472bb 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -73,7 +73,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { if (GV->hasDefaultVisibility() && (isDecl || GV->isWeakForLinker())) return X86II::MO_GOTPCREL; - } else { + } else if (!isTargetWin64()) { assert(isTargetELF() && "Unknown rip-relative target"); // Extra load is needed for all externally visible. @@ -260,9 +260,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; - HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); - HasAVX = ((ECX >> 28) & 0x1); - HasAES = IsIntel && ((ECX >> 25) & 0x1); + HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); + HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); + HasAVX = ((ECX >> 28) & 0x1); + HasAES = IsIntel && ((ECX >> 25) & 0x1); if (IsIntel || IsAMD) { // Determine if bit test memory instructions are slow. @@ -291,6 +292,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , HasSSE4A(false) , HasAVX(false) , HasAES(false) + , HasCLMUL(false) , HasFMA3(false) , HasFMA4(false) , IsBTMemSlow(false) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 486dbc4e2e900..0ee91abe21f4e 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -74,6 +74,9 @@ protected: /// HasAES - Target has AES instructions bool HasAES; + /// HasCLMUL - Target has carry-less multiplication + bool HasCLMUL; + /// HasFMA3 - Target has 3-operand fused multiply-add bool HasFMA3; @@ -149,6 +152,7 @@ public: bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasAVX() const { return HasAVX; } bool hasAES() const { return HasAES; } + bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } bool isBTMemSlow() const { return IsBTMemSlow; } @@ -182,6 +186,10 @@ public: return Is64Bit && (isTargetMingw() || isTargetWindows()); } + bool isTargetWin32() const { + return !Is64Bit && (isTargetMingw() || isTargetWindows()); + } + std::string getDataLayout() const { const char *p; if (is64Bit()) diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index df00d3ffcc791..ce8636eb72b54 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -46,8 +46,15 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, bool RelaxAll) { Triple TheTriple(TT); switch (TheTriple.getOS()) { - default: + case Triple::Darwin: return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); + case Triple::MinGW32: + case Triple::MinGW64: + case Triple::Cygwin: + case Triple::Win32: + return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); + default: + return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); } } @@ -105,15 +112,21 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); - + // If no relocation model was picked, default as appropriate for the target. if (getRelocationModel() == Reloc::Default) { - if (!Subtarget.isTargetDarwin()) - setRelocationModel(Reloc::Static); - else if (Subtarget.is64Bit()) + // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode. + // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we + // use static relocation model by default. + if (Subtarget.isTargetDarwin()) { + if (Subtarget.is64Bit()) + setRelocationModel(Reloc::PIC_); + else + setRelocationModel(Reloc::DynamicNoPIC); + } else if (Subtarget.isTargetWin64()) setRelocationModel(Reloc::PIC_); else - setRelocationModel(Reloc::DynamicNoPIC); + setRelocationModel(Reloc::Static); } assert(getRelocationModel() != Reloc::Default && @@ -136,29 +149,27 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, Subtarget.isTargetDarwin() && is64Bit) setRelocationModel(Reloc::PIC_); - + // Determine the PICStyle based on the target selected. if (getRelocationModel() == Reloc::Static) { // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. Subtarget.setPICStyle(PICStyles::None); + } else if (Subtarget.is64Bit()) { + // PIC in 64 bit mode is always rip-rel. + Subtarget.setPICStyle(PICStyles::RIPRel); } else if (Subtarget.isTargetCygMing()) { Subtarget.setPICStyle(PICStyles::None); } else if (Subtarget.isTargetDarwin()) { - if (Subtarget.is64Bit()) - Subtarget.setPICStyle(PICStyles::RIPRel); - else if (getRelocationModel() == Reloc::PIC_) + if (getRelocationModel() == Reloc::PIC_) Subtarget.setPICStyle(PICStyles::StubPIC); else { assert(getRelocationModel() == Reloc::DynamicNoPIC); Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC); } } else if (Subtarget.isTargetELF()) { - if (Subtarget.is64Bit()) - Subtarget.setPICStyle(PICStyles::RIPRel); - else - Subtarget.setPICStyle(PICStyles::GOT); + Subtarget.setPICStyle(PICStyles::GOT); } - + // Finally, if we have "none" as our PIC style, force to static mode. if (Subtarget.getPICStyle() == PICStyles::None) setRelocationModel(Reloc::Static); @@ -182,9 +193,6 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - // Install a pass to insert x87 FP_REG_KILL instructions, as needed. - PM.add(createX87FPRegKillInserterPass()); - PM.add(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index 6656bdc10eae4..8f06dd32662f2 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -264,15 +264,13 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream O(Str); - + // Check for mov mnemonic - unsigned src, dst, srcSR, dstSR; - if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) { - O << "\tmov " << getRegisterName(dst) << ", "; - O << getRegisterName(src); - } else { + if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm()) + O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", " + << getRegisterName(MI->getOperand(1).getReg()); + else printInstruction(MI, O); - } OutStreamer.EmitRawText(O.str()); } diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 1b8e7edfc7ca9..38b35d7666c0e 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -10,7 +10,7 @@ tablegen(XCoreGenDAGISel.inc -gen-dag-isel) tablegen(XCoreGenCallingConv.inc -gen-callingconv) tablegen(XCoreGenSubtarget.inc -gen-subtarget) -add_llvm_target(XCore +add_llvm_target(XCoreCodeGen XCoreFrameInfo.cpp XCoreInstrInfo.cpp XCoreISelDAGToDAG.cpp diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 5564ddf133eaf..755ece7e9abac 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -56,6 +56,17 @@ namespace { return CurDAG->getTargetConstant(Imm, MVT::i32); } + inline bool immMskBitp(SDNode *inN) const { + ConstantSDNode *N = cast<ConstantSDNode>(inN); + uint32_t value = (uint32_t)N->getZExtValue(); + if (!isMask_32(value)) { + return false; + } + int msksize = 32 - CountLeadingZeros_32(value); + return (msksize >= 1 && msksize <= 8) || + msksize == 16 || msksize == 24 || msksize == 32; + } + // Complex Pattern Selectors. bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); @@ -151,17 +162,15 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; case ISD::Constant: { - if (Predicate_immMskBitp(N)) { + uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue(); + if (immMskBitp(N)) { // Transformation function: get the size of a mask - int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue(); - assert(isMask_32(MaskVal)); // Look for the first non-zero bit - SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal)); + SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val)); return CurDAG->getMachineNode(XCore::MKMSK_rus, dl, MVT::i32, MskSize); } - else if (! Predicate_immU16(N)) { - unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); + else if (!isUInt<16>(Val)) { SDValue CPIdx = CurDAG->getTargetConstantPool(ConstantInt::get( Type::getInt32Ty(*CurDAG->getContext()), Val), diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index dd90ea9767705..ad00046af17de 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -46,33 +46,6 @@ static bool isZeroImm(const MachineOperand &op) { return op.isImm() && op.getImm() == 0; } -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -/// -bool XCoreInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSR, unsigned &DstSR) const { - SrcSR = DstSR = 0; // No sub-registers. - - // We look for 4 kinds of patterns here: - // add dst, src, 0 - // sub dst, src, 0 - // or dst, src, src - // and dst, src, src - if ((MI.getOpcode() == XCore::ADD_2rus || MI.getOpcode() == XCore::SUB_2rus) - && isZeroImm(MI.getOperand(2))) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } else if ((MI.getOpcode() == XCore::OR_3r || MI.getOpcode() == XCore::AND_3r) - && MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } - return false; -} - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If @@ -437,7 +410,7 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, it->getFrameIdx(), RC, &RI); if (emitFrameMoves) { MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol(); - BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel); + BuildMI(MBB, MI, DL, get(XCore::PROLOG_LABEL)).addSym(SaveLabel); XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it)); } } diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index e5b0171579fce..d2b116eef0d8f 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -30,12 +30,6 @@ public: /// virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; } - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 19b9b1f8c00c6..6b3b39ba1d494 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -140,17 +140,7 @@ def immU20 : PatLeaf<(imm), [{ return (uint32_t)N->getZExtValue() < (1 << 20); }]>; -def immMskBitp : PatLeaf<(imm), [{ - uint32_t value = (uint32_t)N->getZExtValue(); - if (!isMask_32(value)) { - return false; - } - int msksize = 32 - CountLeadingZeros_32(value); - return (msksize >= 1 && msksize <= 8) - || msksize == 16 - || msksize == 24 - || msksize == 32; -}]>; +def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>; def immBitp : PatLeaf<(imm), [{ uint32_t value = (uint32_t)N->getZExtValue(); diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 2a88342180e40..f82e59814e775 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -155,10 +155,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -unsigned +void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value, - RegScavenger *RS) const { + int SPAdj, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; DebugLoc dl = MI.getDebugLoc(); @@ -291,7 +290,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Erase old instruction. MBB.erase(II); - return 0; } void @@ -420,7 +418,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { // Show update of SP. MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); MachineLocation SPDst(MachineLocation::VirtualFP); MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4); @@ -439,7 +437,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { if (emitFrameMoves) { MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveLRLabel); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel); MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset); MachineLocation CSSrc(XCore::LR); MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc)); @@ -455,7 +453,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { MBB.addLiveIn(XCore::R10); if (emitFrameMoves) { MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveR10Label); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label); MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset); MachineLocation CSSrc(XCore::R10); MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc)); @@ -467,7 +465,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { if (emitFrameMoves) { // Show FP is now valid. MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); MachineLocation SPDst(FramePtr); MachineLocation SPSrc(MachineLocation::VirtualFP); MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc)); diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 66132ba8ff66f..e636c1c7298aa 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -54,9 +54,8 @@ public: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, FrameIndexValue *Value = NULL, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index abfa514e20cfe..838d5505490f5 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -25,7 +25,7 @@ namespace { // Hello - The first implementation, without getAnalysisUsage. struct Hello : public FunctionPass { static char ID; // Pass identification, replacement for typeid - Hello() : FunctionPass(&ID) {} + Hello() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F) { ++HelloCounter; @@ -37,13 +37,13 @@ namespace { } char Hello::ID = 0; -static RegisterPass<Hello> X("hello", "Hello World Pass"); +INITIALIZE_PASS(Hello, "hello", "Hello World Pass", false, false); namespace { // Hello2 - The second implementation with getAnalysisUsage implemented. struct Hello2 : public FunctionPass { static char ID; // Pass identification, replacement for typeid - Hello2() : FunctionPass(&ID) {} + Hello2() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F) { ++HelloCounter; @@ -60,5 +60,6 @@ namespace { } char Hello2::ID = 0; -static RegisterPass<Hello2> -Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)"); +INITIALIZE_PASS(Hello2, "hello2", + "Hello World Pass (with getAnalysisUsage implemented)", + false, false); diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 28ea079abd360..0c77e1fd8cff4 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -67,7 +67,7 @@ namespace { virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(&ID), maxElements(maxElements) {} + : CallGraphSCCPass(ID), maxElements(maxElements) {} /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; @@ -84,8 +84,8 @@ namespace { } char ArgPromotion::ID = 0; -static RegisterPass<ArgPromotion> -X("argpromotion", "Promote 'by reference' arguments to scalars"); +INITIALIZE_PASS(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false); Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { return new ArgPromotion(maxElements); @@ -208,8 +208,8 @@ static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) { // have direct callees. for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end(); UI != E; ++UI) { - CallSite CS = CallSite::get(*UI); - assert(CS.getInstruction() && "Should only have direct calls!"); + CallSite CS(*UI); + assert(CS && "Should only have direct calls!"); if (!IsAlwaysValidPointer(CS.getArgument(ArgNo))) return false; @@ -619,14 +619,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); - // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value*, 16> Args; while (!F->use_empty()) { - CallSite CS = CallSite::get(F->use_back()); + CallSite CS(F->use_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttrListPtr &CallPAL = CS.getAttributes(); diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 3c05f88027a62..64e8d792dc3ad 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -19,10 +19,12 @@ #define DEBUG_TYPE "constmerge" #include "llvm/Transforms/IPO.h" +#include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -31,7 +33,7 @@ STATISTIC(NumMerged, "Number of global constants merged"); namespace { struct ConstantMerge : public ModulePass { static char ID; // Pass identification, replacement for typeid - ConstantMerge() : ModulePass(&ID) {} + ConstantMerge() : ModulePass(ID) {} // run - For this pass, process all of the globals in the module, // eliminating duplicate constants. @@ -41,12 +43,32 @@ namespace { } char ConstantMerge::ID = 0; -static RegisterPass<ConstantMerge> -X("constmerge", "Merge Duplicate Global Constants"); +INITIALIZE_PASS(ConstantMerge, "constmerge", + "Merge Duplicate Global Constants", false, false); ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } + + +/// Find values that are marked as llvm.used. +static void FindUsedValues(GlobalVariable *LLVMUsed, + SmallPtrSet<const GlobalValue*, 8> &UsedValues) { + if (LLVMUsed == 0) return; + ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer()); + if (Inits == 0) return; + + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) + if (GlobalValue *GV = + dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts())) + UsedValues.insert(GV); +} + bool ConstantMerge::runOnModule(Module &M) { + // Find all the globals that are marked "used". These cannot be merged. + SmallPtrSet<const GlobalValue*, 8> UsedGlobals; + FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); + FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); + // Map unique constant/section pairs to globals. We don't want to merge // globals in different sections. DenseMap<Constant*, GlobalVariable*> CMap; @@ -79,9 +101,13 @@ bool ConstantMerge::runOnModule(Module &M) { // Only process constants with initializers in the default addres space. if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() || - GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty()) + GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() || + // Don't touch values marked with attribute(used). + UsedGlobals.count(GV)) continue; + + Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 475eee8b19e4b..47df235424e2f 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -122,11 +122,11 @@ namespace { protected: // DAH uses this to specify a different ID. - explicit DAE(void *ID) : ModulePass(ID) {} + explicit DAE(char &ID) : ModulePass(ID) {} public: static char ID; // Pass identification, replacement for typeid - DAE() : ModulePass(&ID) {} + DAE() : ModulePass(ID) {} bool runOnModule(Module &M); @@ -151,8 +151,7 @@ namespace { char DAE::ID = 0; -static RegisterPass<DAE> -X("deadargelim", "Dead Argument Elimination"); +INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false); namespace { /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but @@ -160,15 +159,16 @@ namespace { /// by bugpoint. struct DAH : public DAE { static char ID; - DAH() : DAE(&ID) {} + DAH() : DAE(ID) {} virtual bool ShouldHackArguments() const { return true; } }; } char DAH::ID = 0; -static RegisterPass<DAH> -Y("deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)"); +INITIALIZE_PASS(DAH, "deadarghaX0r", + "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", + false, false); /// createDeadArgEliminationPass - This pass removes arguments from functions /// which are not used by the body of the function. @@ -220,11 +220,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // std::vector<Value*> Args; while (!Fn.use_empty()) { - CallSite CS = CallSite::get(Fn.use_back()); + CallSite CS(Fn.use_back()); Instruction *Call = CS.getInstruction(); // Pass all the same arguments. - Args.assign(CS.arg_begin(), CS.arg_begin()+NumArgs); + Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs); // Drop any attributes that were on the vararg arguments. AttrListPtr PAL = CS.getAttributes(); @@ -250,8 +250,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } - if (MDNode *N = Call->getDbgMetadata()) - New->setDbgMetadata(N); + New->setDebugLoc(Call->getDebugLoc()); Args.clear(); @@ -725,7 +724,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // std::vector<Value*> Args; while (!F->use_empty()) { - CallSite CS = CallSite::get(F->use_back()); + CallSite CS(F->use_back()); Instruction *Call = CS.getInstruction(); AttributesVec.clear(); @@ -780,8 +779,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } - if (MDNode *N = Call->getDbgMetadata()) - New->setDbgMetadata(N); + New->setDebugLoc(Call->getDebugLoc()); Args.clear(); diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp index 662fbb5cd4130..5dc50c5bef32f 100644 --- a/lib/Transforms/IPO/DeadTypeElimination.cpp +++ b/lib/Transforms/IPO/DeadTypeElimination.cpp @@ -26,7 +26,7 @@ STATISTIC(NumKilled, "Number of unused typenames removed from symtab"); namespace { struct DTE : public ModulePass { static char ID; // Pass identification, replacement for typeid - DTE() : ModulePass(&ID) {} + DTE() : ModulePass(ID) {} // doPassInitialization - For this pass, it removes global symbol table // entries for primitive types. These are never used for linking in GCC and @@ -45,7 +45,7 @@ namespace { } char DTE::ID = 0; -static RegisterPass<DTE> X("deadtypeelim", "Dead Type Elimination"); +INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false); ModulePass *llvm::createDeadTypeEliminationPass() { return new DTE(); diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 7f67e48ade839..45c5fe76ba7c5 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -17,15 +17,15 @@ #include "llvm/Pass.h" #include "llvm/Constants.h" #include "llvm/Transforms/IPO.h" +#include "llvm/ADT/SetVector.h" #include <algorithm> using namespace llvm; namespace { /// @brief A pass to extract specific functions and their dependencies. class GVExtractorPass : public ModulePass { - std::vector<GlobalValue*> Named; + SetVector<GlobalValue *> Named; bool deleteStuff; - bool reLink; public: static char ID; // Pass identification, replacement for typeid @@ -33,135 +33,42 @@ namespace { /// specified function. Otherwise, it deletes as much of the module as /// possible, except for the function specified. /// - explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true, - bool relinkCallees = false) - : ModulePass(&ID), Named(GVs), deleteStuff(deleteS), - reLink(relinkCallees) {} + explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true) + : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {} bool runOnModule(Module &M) { - if (Named.size() == 0) { - return false; // Nothing to extract - } - - - if (deleteStuff) - return deleteGV(); - M.setModuleInlineAsm(""); - return isolateGV(M); - } - - bool deleteGV() { - for (std::vector<GlobalValue*>::iterator GI = Named.begin(), - GE = Named.end(); GI != GE; ++GI) { - if (Function* NamedFunc = dyn_cast<Function>(*GI)) { - // If we're in relinking mode, set linkage of all internal callees to - // external. This will allow us extract function, and then - link - // everything together - if (reLink) { - for (Function::iterator B = NamedFunc->begin(), BE = NamedFunc->end(); - B != BE; ++B) { - for (BasicBlock::iterator I = B->begin(), E = B->end(); - I != E; ++I) { - if (CallInst* callInst = dyn_cast<CallInst>(&*I)) { - Function* Callee = callInst->getCalledFunction(); - if (Callee && Callee->hasLocalLinkage()) - Callee->setLinkage(GlobalValue::ExternalLinkage); - } - } - } - } - - NamedFunc->setLinkage(GlobalValue::ExternalLinkage); - NamedFunc->deleteBody(); - assert(NamedFunc->isDeclaration() && "This didn't make the function external!"); - } else { - if (!(*GI)->isDeclaration()) { - cast<GlobalVariable>(*GI)->setInitializer(0); //clear the initializer - (*GI)->setLinkage(GlobalValue::ExternalLinkage); - } - } - } - return true; - } - - bool isolateGV(Module &M) { - // Mark all globals internal - // FIXME: what should we do with private linkage? - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) + // Visit the global inline asm. + if (!deleteStuff) + M.setModuleInlineAsm(""); + + // For simplicity, just give all GlobalValues ExternalLinkage. A trickier + // implementation could figure out which GlobalValues are actually + // referenced by the Named set, and which GlobalValues in the rest of + // the module are referenced by the NamedSet, and get away with leaving + // more internal and private things internal and private. But for now, + // be conservative and simple. + + // Visit the GlobalVariables. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) if (!I->isDeclaration()) { - I->setLinkage(GlobalValue::InternalLinkage); + if (I->hasLocalLinkage()) + I->setVisibility(GlobalValue::HiddenVisibility); + I->setLinkage(GlobalValue::ExternalLinkage); + if (deleteStuff == Named.count(I)) + I->setInitializer(0); } + + // Visit the Functions. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration()) { - I->setLinkage(GlobalValue::InternalLinkage); - } - - // Make sure our result is globally accessible... - // by putting them in the used array - { - std::vector<Constant *> AUGs; - const Type *SBP= - Type::getInt8PtrTy(M.getContext()); - for (std::vector<GlobalValue*>::iterator GI = Named.begin(), - GE = Named.end(); GI != GE; ++GI) { - (*GI)->setLinkage(GlobalValue::ExternalLinkage); - AUGs.push_back(ConstantExpr::getBitCast(*GI, SBP)); - } - ArrayType *AT = ArrayType::get(SBP, AUGs.size()); - Constant *Init = ConstantArray::get(AT, AUGs); - GlobalValue *gv = new GlobalVariable(M, AT, false, - GlobalValue::AppendingLinkage, - Init, "llvm.used"); - gv->setSection("llvm.metadata"); - } - - // All of the functions may be used by global variables or the named - // globals. Loop through them and create a new, external functions that - // can be "used", instead of ones with bodies. - std::vector<Function*> NewFunctions; - - Function *Last = --M.end(); // Figure out where the last real fn is. - - for (Module::iterator I = M.begin(); ; ++I) { - if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) { - Function *New = Function::Create(I->getFunctionType(), - GlobalValue::ExternalLinkage); - New->copyAttributesFrom(I); - - // If it's not the named function, delete the body of the function - I->dropAllReferences(); - - M.getFunctionList().push_back(New); - NewFunctions.push_back(New); - New->takeName(I); + if (I->hasLocalLinkage()) + I->setVisibility(GlobalValue::HiddenVisibility); + I->setLinkage(GlobalValue::ExternalLinkage); + if (deleteStuff == Named.count(I)) + I->deleteBody(); } - if (&*I == Last) break; // Stop after processing the last function - } - - // Now that we have replacements all set up, loop through the module, - // deleting the old functions, replacing them with the newly created - // functions. - if (!NewFunctions.empty()) { - unsigned FuncNum = 0; - Module::iterator I = M.begin(); - do { - if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) { - // Make everything that uses the old function use the new dummy fn - I->replaceAllUsesWith(NewFunctions[FuncNum++]); - - Function *Old = I; - ++I; // Move the iterator to the new function - - // Delete the old function! - M.getFunctionList().erase(Old); - - } else { - ++I; // Skip the function we are extracting - } - } while (&*I != NewFunctions[0]); - } - return true; } }; @@ -170,6 +77,6 @@ namespace { } ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs, - bool deleteFn, bool relinkCallees) { - return new GVExtractorPass(GVs, deleteFn, relinkCallees); + bool deleteFn) { + return new GVExtractorPass(GVs, deleteFn); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 9bd7af61c531f..6165ba023f737 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -41,7 +41,7 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias"); namespace { struct FunctionAttrs : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(&ID) {} + FunctionAttrs() : CallGraphSCCPass(ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC); @@ -69,8 +69,8 @@ namespace { } char FunctionAttrs::ID = 0; -static RegisterPass<FunctionAttrs> -X("functionattrs", "Deduce function attributes"); +INITIALIZE_PASS(FunctionAttrs, "functionattrs", + "Deduce function attributes", false, false); Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); } @@ -162,14 +162,14 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. - CallSite CS = CallSite::get(I); - if (CS.getInstruction() && CS.getCalledFunction()) { + CallSite CS(cast<Value>(I)); + if (CS && CS.getCalledFunction()) { // Ignore calls to functions in the same SCC. if (SCCNodes.count(CS.getCalledFunction())) continue; // Ignore intrinsics that only access local memory. if (unsigned id = CS.getCalledFunction()->getIntrinsicID()) - if (AliasAnalysis::getModRefBehavior(id) == + if (AliasAnalysis::getIntrinsicModRefBehavior(id) == AliasAnalysis::AccessesArguments) { // Check that all pointer arguments point to local memory. for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 44216a6df99c4..aa18601b9aeca 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -31,7 +31,7 @@ STATISTIC(NumVariables, "Number of global variables removed"); namespace { struct GlobalDCE : public ModulePass { static char ID; // Pass identification, replacement for typeid - GlobalDCE() : ModulePass(&ID) {} + GlobalDCE() : ModulePass(ID) {} // run - Do the GlobalDCE pass on the specified module, optionally updating // the specified callgraph to reflect the changes. @@ -51,7 +51,8 @@ namespace { } char GlobalDCE::ID = 0; -static RegisterPass<GlobalDCE> X("globaldce", "Dead Global Elimination"); +INITIALIZE_PASS(GlobalDCE, "globaldce", + "Dead Global Elimination", false, false); ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 735a1c47c39b5..a77af549caa13 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -59,7 +59,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - GlobalOpt() : ModulePass(&ID) {} + GlobalOpt() : ModulePass(ID) {} bool runOnModule(Module &M); @@ -74,7 +74,8 @@ namespace { } char GlobalOpt::ID = 0; -static RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer"); +INITIALIZE_PASS(GlobalOpt, "globalopt", + "Global Variable Optimizer", false, false); ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } @@ -1467,7 +1468,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, TargetData *TD) { if (!TD) return false; - + // If this is a malloc of an abstract type, don't touch it. if (!AllocTy->isSized()) return false; @@ -2077,7 +2078,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; // The first index must be zero. - ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin())); + ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin())); if (!CI || !CI->isZero()) return false; // The remaining indices must be compile-time known integers within the @@ -2302,7 +2303,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (isa<InlineAsm>(CI->getCalledValue())) return false; // Resolve function pointers. - Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue())); + Function *Callee = dyn_cast<Function>(getVal(Values, + CI->getCalledValue())); if (!Callee) return false; // Cannot resolve. SmallVector<Constant*, 8> Formals; diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index e4db235b1d108..1b3cf7810cc68 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -35,7 +35,7 @@ namespace { /// struct IPCP : public ModulePass { static char ID; // Pass identification, replacement for typeid - IPCP() : ModulePass(&ID) {} + IPCP() : ModulePass(ID) {} bool runOnModule(Module &M); private: @@ -45,8 +45,8 @@ namespace { } char IPCP::ID = 0; -static RegisterPass<IPCP> -X("ipconstprop", "Interprocedural constant propagation"); +INITIALIZE_PASS(IPCP, "ipconstprop", + "Interprocedural constant propagation", false, false); ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); } @@ -94,7 +94,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { if (!isa<CallInst>(U) && !isa<InvokeInst>(U)) return false; - CallSite CS = CallSite::get(cast<Instruction>(U)); + CallSite CS(cast<Instruction>(U)); if (!CS.isCallee(UI)) return false; @@ -219,7 +219,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { // constant. bool MadeChange = false; for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { - CallSite CS = CallSite::get(*UI); + CallSite CS(*UI); Instruction* Call = CS.getInstruction(); // Not a call instruction or a call instruction that's not calling F diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 8e312e7d91855..ecc60ad069325 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -36,7 +36,7 @@ namespace { InlineCostAnalyzer CA; public: // Use extremely low threshold. - AlwaysInliner() : Inliner(&ID, -2000000000) {} + AlwaysInliner() : Inliner(ID, -2000000000) {} static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); @@ -61,8 +61,8 @@ namespace { } char AlwaysInliner::ID = 0; -static RegisterPass<AlwaysInliner> -X("always-inline", "Inliner for always_inline functions"); +INITIALIZE_PASS(AlwaysInliner, "always-inline", + "Inliner for always_inline functions", false, false); Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 74b4a1c10ece2..9c6637dfe5ad6 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -33,8 +33,8 @@ namespace { SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: - SimpleInliner() : Inliner(&ID) {} - SimpleInliner(int Threshold) : Inliner(&ID, Threshold) {} + SimpleInliner() : Inliner(ID) {} + SimpleInliner(int Threshold) : Inliner(ID, Threshold) {} static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { return CA.getInlineCost(CS, NeverInline); @@ -56,8 +56,8 @@ namespace { } char SimpleInliner::ID = 0; -static RegisterPass<SimpleInliner> -X("inline", "Function Integration/Inlining"); +INITIALIZE_PASS(SimpleInliner, "inline", + "Function Integration/Inlining", false, false); Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 9bb01f5699fe3..4983e8e13a3ee 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -48,10 +48,10 @@ HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325), // Threshold to use when optsize is specified (and there is no -inline-limit). const int OptSizeThreshold = 75; -Inliner::Inliner(void *ID) +Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {} -Inliner::Inliner(void *ID, int Threshold) +Inliner::Inliner(char &ID, int Threshold) : CallGraphSCCPass(ID), InlineThreshold(Threshold) {} /// getAnalysisUsage - For this class, we declare that we require and preserve @@ -238,11 +238,11 @@ bool Inliner::shouldInline(CallSite CS) { bool someOuterCallWouldNotBeInlined = false; for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); I != E; ++I) { - CallSite CS2 = CallSite::get(*I); + CallSite CS2(*I); // If this isn't a call to Caller (it could be some other sort // of reference) skip it. - if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller) + if (!CS2 || CS2.getCalledFunction() != Caller) continue; InlineCost IC2 = getInlineCost(CS2); @@ -334,10 +334,10 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - CallSite CS = CallSite::get(I); + CallSite CS(cast<Value>(I)); // If this isn't a call, or it is a call to an intrinsic, it can // never be inlined. - if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I)) + if (!CS || isa<IntrinsicInst>(I)) continue; // If this is a direct call to an external function, we can never inline diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 47abb7dfd8121..a1d919fd8a042 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -63,11 +63,11 @@ namespace { } // end anonymous namespace char InternalizePass::ID = 0; -static RegisterPass<InternalizePass> -X("internalize", "Internalize Global Symbols"); +INITIALIZE_PASS(InternalizePass, "internalize", + "Internalize Global Symbols", false, false); InternalizePass::InternalizePass(bool AllButMain) - : ModulePass(&ID), AllButMain(AllButMain){ + : ModulePass(ID), AllButMain(AllButMain){ if (!APIFile.empty()) // If a filename is specified, use it. LoadFile(APIFile.c_str()); if (!APIList.empty()) // If a list is specified, use it as well. @@ -75,7 +75,7 @@ InternalizePass::InternalizePass(bool AllButMain) } InternalizePass::InternalizePass(const std::vector<const char *>&exportList) - : ModulePass(&ID), AllButMain(false){ + : ModulePass(ID), AllButMain(false){ for(std::vector<const char *>::const_iterator itr = exportList.begin(); itr != exportList.end(); itr++) { ExternalNames.insert(*itr); diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index cb813303facb9..f88dff67d7c98 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -37,7 +37,7 @@ namespace { unsigned NumLoops; explicit LoopExtractor(unsigned numLoops = ~0) - : LoopPass(&ID), NumLoops(numLoops) {} + : LoopPass(ID), NumLoops(numLoops) {} virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -50,8 +50,8 @@ namespace { } char LoopExtractor::ID = 0; -static RegisterPass<LoopExtractor> -X("loop-extract", "Extract loops into new functions"); +INITIALIZE_PASS(LoopExtractor, "loop-extract", + "Extract loops into new functions", false, false); namespace { /// SingleLoopExtractor - For bugpoint. @@ -62,8 +62,8 @@ namespace { } // End anonymous namespace char SingleLoopExtractor::ID = 0; -static RegisterPass<SingleLoopExtractor> -Y("loop-extract-single", "Extract at most one loop into a new function"); +INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single", + "Extract at most one loop into a new function", false, false); // createLoopExtractorPass - This pass extracts all natural loops from the // program into a function if it can. @@ -147,27 +147,26 @@ namespace { std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName; public: static char ID; // Pass identification, replacement for typeid - explicit BlockExtractorPass(const std::vector<BasicBlock*> &B) - : ModulePass(&ID), BlocksToNotExtract(B) { + BlockExtractorPass() : ModulePass(ID) { if (!BlockFile.empty()) LoadFile(BlockFile.c_str()); } - BlockExtractorPass() : ModulePass(&ID) {} bool runOnModule(Module &M); }; } char BlockExtractorPass::ID = 0; -static RegisterPass<BlockExtractorPass> -XX("extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)"); +INITIALIZE_PASS(BlockExtractorPass, "extract-blocks", + "Extract Basic Blocks From Module (for bugpoint use)", + false, false); // createBlockExtractorPass - This pass extracts all blocks (except those // specified in the argument list) from the functions in the module. // -ModulePass *llvm::createBlockExtractorPass(const std::vector<BasicBlock*> &BTNE) +ModulePass *llvm::createBlockExtractorPass() { - return new BlockExtractorPass(BTNE); + return new BlockExtractorPass(); } void BlockExtractorPass::LoadFile(const char *Filename) { diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index 76cfef8335c98..6c715de04b763 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -109,7 +109,7 @@ namespace { bool IsTransformableFunction(StringRef Name); public: static char ID; // Pass identification, replacement for typeid - LowerSetJmp() : ModulePass(&ID) {} + LowerSetJmp() : ModulePass(ID) {} void visitCallInst(CallInst& CI); void visitInvokeInst(InvokeInst& II); @@ -122,7 +122,7 @@ namespace { } // end anonymous namespace char LowerSetJmp::ID = 0; -static RegisterPass<LowerSetJmp> X("lowersetjmp", "Lower Set Jump"); +INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false); // run - Run the transformation on the program. We grab the function // prototypes for longjmp and setjmp. If they are used in the program, diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index aeeafe7fd19dc..5d838f98aa082 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -29,44 +29,27 @@ // // Many functions have their address taken by the virtual function table for // the object they belong to. However, as long as it's only used for a lookup -// and call, this is irrelevant, and we'd like to fold such implementations. +// and call, this is irrelevant, and we'd like to fold such functions. // -// * use SCC to cut down on pair-wise comparisons and solve larger cycles. +// * switch from n^2 pair-wise comparisons to an n-way comparison for each +// bucket. // -// The current implementation loops over a pair-wise comparison of all -// functions in the program where the two functions in the pair are treated as -// assumed to be equal until proven otherwise. We could both use fewer -// comparisons and optimize more complex cases if we used strongly connected -// components of the call graph. -// -// * be smarter about bitcast. +// * be smarter about bitcasts. // // In order to fold functions, we will sometimes add either bitcast instructions // or bitcast constant expressions. Unfortunately, this can confound further // analysis since the two functions differ where one has a bitcast and the -// other doesn't. We should learn to peer through bitcasts without imposing bad -// performance properties. -// -// * don't emit aliases for Mach-O. -// -// Mach-O doesn't support aliases which means that we must avoid introducing -// them in the bitcode on architectures which don't support them, such as -// Mac OSX. There's a few approaches to this problem; -// a) teach codegen to lower global aliases to thunks on platforms which don't -// support them. -// b) always emit thunks, and create a separate thunk-to-alias pass which -// runs on ELF systems. This has the added benefit of transforming other -// thunks such as those produced by a C++ frontend into aliases when legal -// to do so. +// other doesn't. We should learn to look through bitcasts. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Constants.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" @@ -76,68 +59,103 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" -#include <map> #include <vector> using namespace llvm; STATISTIC(NumFunctionsMerged, "Number of functions merged"); namespace { + /// MergeFunctions finds functions which will generate identical machine code, + /// by considering all pointer types to be equivalent. Once identified, + /// MergeFunctions will fold them by replacing a call to one to a call to a + /// bitcast of the other. + /// class MergeFunctions : public ModulePass { public: - static char ID; // Pass identification, replacement for typeid - MergeFunctions() : ModulePass(&ID) {} + static char ID; + MergeFunctions() : ModulePass(ID) {} bool runOnModule(Module &M); private: - bool isEquivalentGEP(const GetElementPtrInst *GEP1, - const GetElementPtrInst *GEP2); - - bool equals(const BasicBlock *BB1, const BasicBlock *BB2); - bool equals(const Function *F, const Function *G); + /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G + /// may be deleted, or may be converted into a thunk. In either case, it + /// should never be visited again. + void MergeTwoFunctions(Function *F, Function *G) const; - bool compare(const Value *V1, const Value *V2); + /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also + /// replace direct uses of G with bitcast(F). + void WriteThunk(Function *F, Function *G) const; - const Function *LHS, *RHS; - typedef DenseMap<const Value *, unsigned long> IDMap; - IDMap Map; - DenseMap<const Function *, IDMap> Domains; - DenseMap<const Function *, unsigned long> DomainCount; TargetData *TD; }; } char MergeFunctions::ID = 0; -static RegisterPass<MergeFunctions> X("mergefunc", "Merge Functions"); +INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false); ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); } -// ===----------------------------------------------------------------------=== -// Comparison of functions -// ===----------------------------------------------------------------------=== +namespace { +/// FunctionComparator - Compares two functions to determine whether or not +/// they will generate machine code with the same behaviour. TargetData is +/// used if available. The comparator always fails conservatively (erring on the +/// side of claiming that two functions are different). +class FunctionComparator { +public: + FunctionComparator(const TargetData *TD, const Function *F1, + const Function *F2) + : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {} + + /// Compare - test whether the two functions have equivalent behaviour. + bool Compare(); + +private: + /// Compare - test whether two basic blocks have equivalent behaviour. + bool Compare(const BasicBlock *BB1, const BasicBlock *BB2); + + /// Enumerate - Assign or look up previously assigned numbers for the two + /// values, and return whether the numbers are equal. Numbers are assigned in + /// the order visited. + bool Enumerate(const Value *V1, const Value *V2); + + /// isEquivalentOperation - Compare two Instructions for equivalence, similar + /// to Instruction::isSameOperationAs but with modifications to the type + /// comparison. + bool isEquivalentOperation(const Instruction *I1, + const Instruction *I2) const; + + /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic. + bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2); + bool isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2) { + return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); + } -static unsigned long hash(const Function *F) { - const FunctionType *FTy = F->getFunctionType(); + /// isEquivalentType - Compare two Types, treating all pointer types as equal. + bool isEquivalentType(const Type *Ty1, const Type *Ty2) const; - FoldingSetNodeID ID; - ID.AddInteger(F->size()); - ID.AddInteger(F->getCallingConv()); - ID.AddBoolean(F->hasGC()); - ID.AddBoolean(FTy->isVarArg()); - ID.AddInteger(FTy->getReturnType()->getTypeID()); - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - ID.AddInteger(FTy->getParamType(i)->getTypeID()); - return ID.ComputeHash(); + // The two functions undergoing comparison. + const Function *F1, *F2; + + const TargetData *TD; + + typedef DenseMap<const Value *, unsigned long> IDMap; + IDMap Map1, Map2; + unsigned long IDMap1Count, IDMap2Count; +}; } -/// isEquivalentType - any two pointers are equivalent. Otherwise, standard -/// type equivalence rules apply. -static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { +/// isEquivalentType - any two pointers in the same address space are +/// equivalent. Otherwise, standard type equivalence rules apply. +bool FunctionComparator::isEquivalentType(const Type *Ty1, + const Type *Ty2) const { if (Ty1 == Ty2) return true; if (Ty1->getTypeID() != Ty2->getTypeID()) @@ -184,21 +202,6 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { return true; } - case Type::UnionTyID: { - const UnionType *UTy1 = cast<UnionType>(Ty1); - const UnionType *UTy2 = cast<UnionType>(Ty2); - - // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc. - if (UTy1->getNumElements() != UTy2->getNumElements()) - return false; - - for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) { - if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i))) - return false; - } - return true; - } - case Type::FunctionTyID: { const FunctionType *FTy1 = cast<FunctionType>(Ty1); const FunctionType *FTy2 = cast<FunctionType>(Ty2); @@ -216,11 +219,18 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { return true; } - case Type::ArrayTyID: + case Type::ArrayTyID: { + const ArrayType *ATy1 = cast<ArrayType>(Ty1); + const ArrayType *ATy2 = cast<ArrayType>(Ty2); + return ATy1->getNumElements() == ATy2->getNumElements() && + isEquivalentType(ATy1->getElementType(), ATy2->getElementType()); + } + case Type::VectorTyID: { - const SequentialType *STy1 = cast<SequentialType>(Ty1); - const SequentialType *STy2 = cast<SequentialType>(Ty2); - return isEquivalentType(STy1->getElementType(), STy2->getElementType()); + const VectorType *VTy1 = cast<VectorType>(Ty1); + const VectorType *VTy2 = cast<VectorType>(Ty2); + return VTy1->getNumElements() == VTy2->getNumElements() && + isEquivalentType(VTy1->getElementType(), VTy2->getElementType()); } } } @@ -228,8 +238,8 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { /// isEquivalentOperation - determine whether the two operations are the same /// except that pointer-to-A and pointer-to-B are equivalent. This should be /// kept in sync with Instruction::isSameOperationAs. -static bool -isEquivalentOperation(const Instruction *I1, const Instruction *I2) { +bool FunctionComparator::isEquivalentOperation(const Instruction *I1, + const Instruction *I2) const { if (I1->getOpcode() != I2->getOpcode() || I1->getNumOperands() != I2->getNumOperands() || !isEquivalentType(I1->getType(), I2->getType()) || @@ -281,18 +291,15 @@ isEquivalentOperation(const Instruction *I1, const Instruction *I2) { return true; } -bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1, - const GetElementPtrInst *GEP2) { +/// isEquivalentGEP - determine whether two GEP operations perform the same +/// underlying arithmetic. +bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, + const GEPOperator *GEP2) { + // When we have target data, we can reduce the GEP down to the value in bytes + // added to the address. if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) { - SmallVector<Value *, 8> Indices1, Indices2; - for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(), - E = GEP1->idx_end(); I != E; ++I) { - Indices1.push_back(*I); - } - for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(), - E = GEP2->idx_end(); I != E; ++I) { - Indices2.push_back(*I); - } + SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end()); + SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end()); uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(), Indices1.data(), Indices1.size()); uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(), @@ -300,7 +307,6 @@ bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1, return Offset1 == Offset2; } - // Equivalent types aren't enough. if (GEP1->getPointerOperand()->getType() != GEP2->getPointerOperand()->getType()) return false; @@ -309,19 +315,26 @@ bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1, return false; for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { - if (!compare(GEP1->getOperand(i), GEP2->getOperand(i))) + if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) return false; } return true; } -bool MergeFunctions::compare(const Value *V1, const Value *V2) { - if (V1 == LHS || V1 == RHS) - if (V2 == LHS || V2 == RHS) - return true; +/// Enumerate - Compare two values used by the two functions under pair-wise +/// comparison. If this is the first time the values are seen, they're added to +/// the mapping so that we will detect mismatches on next use. +bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) { + // Check for function @f1 referring to itself and function @f2 referring to + // itself, or referring to each other, or both referring to either of them. + // They're all equivalent if the two functions are otherwise equivalent. + if (V1 == F1 && V2 == F2) + return true; + if (V1 == F2 && V2 == F1) + return true; - // TODO: constant expressions in terms of LHS and RHS + // TODO: constant expressions with GEP or references to F1 or F2. if (isa<Constant>(V1)) return V1 == V2; @@ -332,228 +345,138 @@ bool MergeFunctions::compare(const Value *V1, const Value *V2) { IA1->getConstraintString() == IA2->getConstraintString(); } - // We enumerate constants globally and arguments, basic blocks or - // instructions within the function they belong to. - const Function *Domain1 = NULL; - if (const Argument *A = dyn_cast<Argument>(V1)) { - Domain1 = A->getParent(); - } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) { - Domain1 = BB->getParent(); - } else if (const Instruction *I = dyn_cast<Instruction>(V1)) { - Domain1 = I->getParent()->getParent(); - } - - const Function *Domain2 = NULL; - if (const Argument *A = dyn_cast<Argument>(V2)) { - Domain2 = A->getParent(); - } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) { - Domain2 = BB->getParent(); - } else if (const Instruction *I = dyn_cast<Instruction>(V2)) { - Domain2 = I->getParent()->getParent(); - } - - if (Domain1 != Domain2) - if (Domain1 != LHS && Domain1 != RHS) - if (Domain2 != LHS && Domain2 != RHS) - return false; - - IDMap &Map1 = Domains[Domain1]; unsigned long &ID1 = Map1[V1]; if (!ID1) - ID1 = ++DomainCount[Domain1]; + ID1 = ++IDMap1Count; - IDMap &Map2 = Domains[Domain2]; unsigned long &ID2 = Map2[V2]; if (!ID2) - ID2 = ++DomainCount[Domain2]; + ID2 = ++IDMap2Count; return ID1 == ID2; } -bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) { - BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end(); - BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end(); +/// Compare - test whether two basic blocks have equivalent behaviour. +bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) { + BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); + BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); do { - if (!compare(FI, GI)) + if (!Enumerate(F1I, F2I)) return false; - if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) { - const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI); - const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI); + if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) { + const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I); + if (!GEP2) + return false; - if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand())) + if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) return false; if (!isEquivalentGEP(GEP1, GEP2)) return false; } else { - if (!isEquivalentOperation(FI, GI)) + if (!isEquivalentOperation(F1I, F2I)) return false; - for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { - Value *OpF = FI->getOperand(i); - Value *OpG = GI->getOperand(i); + assert(F1I->getNumOperands() == F2I->getNumOperands()); + for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) { + Value *OpF1 = F1I->getOperand(i); + Value *OpF2 = F2I->getOperand(i); - if (!compare(OpF, OpG)) + if (!Enumerate(OpF1, OpF2)) return false; - if (OpF->getValueID() != OpG->getValueID() || - !isEquivalentType(OpF->getType(), OpG->getType())) + if (OpF1->getValueID() != OpF2->getValueID() || + !isEquivalentType(OpF1->getType(), OpF2->getType())) return false; } } - ++FI, ++GI; - } while (FI != FE && GI != GE); + ++F1I, ++F2I; + } while (F1I != F1E && F2I != F2E); - return FI == FE && GI == GE; + return F1I == F1E && F2I == F2E; } -bool MergeFunctions::equals(const Function *F, const Function *G) { +/// Compare - test whether the two functions have equivalent behaviour. +bool FunctionComparator::Compare() { // We need to recheck everything, but check the things that weren't included // in the hash first. - if (F->getAttributes() != G->getAttributes()) + if (F1->getAttributes() != F2->getAttributes()) return false; - if (F->hasGC() != G->hasGC()) + if (F1->hasGC() != F2->hasGC()) return false; - if (F->hasGC() && F->getGC() != G->getGC()) + if (F1->hasGC() && F1->getGC() != F2->getGC()) return false; - if (F->hasSection() != G->hasSection()) + if (F1->hasSection() != F2->hasSection()) return false; - if (F->hasSection() && F->getSection() != G->getSection()) + if (F1->hasSection() && F1->getSection() != F2->getSection()) return false; - if (F->isVarArg() != G->isVarArg()) + if (F1->isVarArg() != F2->isVarArg()) return false; // TODO: if it's internal and only used in direct calls, we could handle this // case too. - if (F->getCallingConv() != G->getCallingConv()) + if (F1->getCallingConv() != F2->getCallingConv()) return false; - if (!isEquivalentType(F->getFunctionType(), G->getFunctionType())) + if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType())) return false; - assert(F->arg_size() == G->arg_size() && + assert(F1->arg_size() == F2->arg_size() && "Identical functions have a different number of args."); - LHS = F; - RHS = G; - // Visit the arguments so that they get enumerated in the order they're // passed in. - for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(), - fe = F->arg_end(); fi != fe; ++fi, ++gi) { - if (!compare(fi, gi)) + for (Function::const_arg_iterator f1i = F1->arg_begin(), + f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { + if (!Enumerate(f1i, f2i)) llvm_unreachable("Arguments repeat"); } - SmallVector<const BasicBlock *, 8> FBBs, GBBs; - SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F. - FBBs.push_back(&F->getEntryBlock()); - GBBs.push_back(&G->getEntryBlock()); - VisitedBBs.insert(FBBs[0]); - while (!FBBs.empty()) { - const BasicBlock *FBB = FBBs.pop_back_val(); - const BasicBlock *GBB = GBBs.pop_back_val(); - if (!compare(FBB, GBB) || !equals(FBB, GBB)) { - Domains.clear(); - DomainCount.clear(); - return false; - } - const TerminatorInst *FTI = FBB->getTerminator(); - const TerminatorInst *GTI = GBB->getTerminator(); - assert(FTI->getNumSuccessors() == GTI->getNumSuccessors()); - for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(FTI->getSuccessor(i))) - continue; - FBBs.push_back(FTI->getSuccessor(i)); - GBBs.push_back(GTI->getSuccessor(i)); - } - } + // We do a CFG-ordered walk since the actual ordering of the blocks in the + // linked list is immaterial. Our walk starts at the entry block for both + // functions, then takes each block from each terminator in order. As an + // artifact, this also means that unreachable blocks are ignored. + SmallVector<const BasicBlock *, 8> F1BBs, F2BBs; + SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1. - Domains.clear(); - DomainCount.clear(); - return true; -} + F1BBs.push_back(&F1->getEntryBlock()); + F2BBs.push_back(&F2->getEntryBlock()); -// ===----------------------------------------------------------------------=== -// Folding of functions -// ===----------------------------------------------------------------------=== - -// Cases: -// * F is external strong, G is external strong: -// turn G into a thunk to F (1) -// * F is external strong, G is external weak: -// turn G into a thunk to F (1) -// * F is external weak, G is external weak: -// unfoldable -// * F is external strong, G is internal: -// address of G taken: -// turn G into a thunk to F (1) -// address of G not taken: -// make G an alias to F (2) -// * F is internal, G is external weak -// address of F is taken: -// turn G into a thunk to F (1) -// address of F is not taken: -// make G an alias of F (2) -// * F is internal, G is internal: -// address of F and G are taken: -// turn G into a thunk to F (1) -// address of G is not taken: -// make G an alias to F (2) -// -// alias requires linkage == (external,local,weak) fallback to creating a thunk -// external means 'externally visible' linkage != (internal,private) -// internal means linkage == (internal,private) -// weak means linkage mayBeOverridable -// being external implies that the address is taken -// -// 1. turn G into a thunk to F -// 2. make G an alias to F + VisitedBBs.insert(F1BBs[0]); + while (!F1BBs.empty()) { + const BasicBlock *F1BB = F1BBs.pop_back_val(); + const BasicBlock *F2BB = F2BBs.pop_back_val(); -enum LinkageCategory { - ExternalStrong, - ExternalWeak, - Internal -}; + if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB)) + return false; -static LinkageCategory categorize(const Function *F) { - switch (F->getLinkage()) { - case GlobalValue::InternalLinkage: - case GlobalValue::PrivateLinkage: - case GlobalValue::LinkerPrivateLinkage: - return Internal; - - case GlobalValue::WeakAnyLinkage: - case GlobalValue::WeakODRLinkage: - case GlobalValue::ExternalWeakLinkage: - case GlobalValue::LinkerPrivateWeakLinkage: - return ExternalWeak; - - case GlobalValue::ExternalLinkage: - case GlobalValue::AvailableExternallyLinkage: - case GlobalValue::LinkOnceAnyLinkage: - case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::AppendingLinkage: - case GlobalValue::DLLImportLinkage: - case GlobalValue::DLLExportLinkage: - case GlobalValue::CommonLinkage: - return ExternalStrong; - } + const TerminatorInst *F1TI = F1BB->getTerminator(); + const TerminatorInst *F2TI = F2BB->getTerminator(); - llvm_unreachable("Unknown LinkageType."); - return ExternalWeak; + assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors()); + for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(F1TI->getSuccessor(i))) + continue; + + F1BBs.push_back(F1TI->getSuccessor(i)); + F2BBs.push_back(F2TI->getSuccessor(i)); + } + } + return true; } -static void ThunkGToF(Function *F, Function *G) { +/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace +/// direct uses of G with bitcast(F). +void MergeFunctions::WriteThunk(Function *F, Function *G) const { if (!G->mayBeOverridden()) { // Redirect direct callers of G to F. Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); @@ -567,33 +490,34 @@ static void ThunkGToF(Function *F, Function *G) { } } + // If G was internal then we may have replaced all uses if G with F. If so, + // stop here and delete G. There's no need for a thunk. + if (G->hasLocalLinkage() && G->use_empty()) { + G->eraseFromParent(); + return; + } + Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", G->getParent()); BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); + IRBuilder<false> Builder(BB); SmallVector<Value *, 16> Args; unsigned i = 0; const FunctionType *FFTy = F->getFunctionType(); for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); AI != AE; ++AI) { - if (FFTy->getParamType(i) == AI->getType()) { - Args.push_back(AI); - } else { - Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB)); - } + Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i))); ++i; } - CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB); + CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end()); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); if (NewG->getReturnType()->isVoidTy()) { - ReturnInst::Create(F->getContext(), BB); - } else if (CI->getType() != NewG->getReturnType()) { - Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB); - ReturnInst::Create(F->getContext(), BCI, BB); + Builder.CreateRetVoid(); } else { - ReturnInst::Create(F->getContext(), CI, BB); + Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType())); } NewG->copyAttributesFrom(G); @@ -602,152 +526,126 @@ static void ThunkGToF(Function *F, Function *G) { G->eraseFromParent(); } -static void AliasGToF(Function *F, Function *G) { - // Darwin will trigger llvm_unreachable if asked to codegen an alias. - return ThunkGToF(F, G); - -#if 0 - if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage()) - return ThunkGToF(F, G); - - GlobalAlias *GA = new GlobalAlias( - G->getType(), G->getLinkage(), "", - ConstantExpr::getBitCast(F, G->getType()), G->getParent()); - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - GA->takeName(G); - GA->setVisibility(G->getVisibility()); - G->replaceAllUsesWith(GA); - G->eraseFromParent(); -#endif -} - -static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { - Function *F = FnVec[i]; - Function *G = FnVec[j]; - - LinkageCategory catF = categorize(F); - LinkageCategory catG = categorize(G); - - if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) { - std::swap(FnVec[i], FnVec[j]); - std::swap(F, G); - std::swap(catF, catG); - } - - switch (catF) { - case ExternalStrong: - switch (catG) { - case ExternalStrong: - case ExternalWeak: - ThunkGToF(F, G); - break; - case Internal: - if (G->hasAddressTaken()) - ThunkGToF(F, G); - else - AliasGToF(F, G); - break; - } - break; - - case ExternalWeak: { - assert(catG == ExternalWeak); +/// MergeTwoFunctions - Merge two equivalent functions. Upon completion, +/// Function G is deleted. +void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const { + if (F->isWeakForLinker()) { + assert(G->isWeakForLinker()); // Make them both thunks to the same internal function. - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", F->getParent()); H->copyAttributesFrom(F); H->takeName(F); F->replaceAllUsesWith(H); - ThunkGToF(F, G); - ThunkGToF(F, H); + unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); - F->setLinkage(GlobalValue::InternalLinkage); - } break; - - case Internal: - switch (catG) { - case ExternalStrong: - llvm_unreachable(0); - // fall-through - case ExternalWeak: - if (F->hasAddressTaken()) - ThunkGToF(F, G); - else - AliasGToF(F, G); - break; - case Internal: { - bool addrTakenF = F->hasAddressTaken(); - bool addrTakenG = G->hasAddressTaken(); - if (!addrTakenF && addrTakenG) { - std::swap(FnVec[i], FnVec[j]); - std::swap(F, G); - std::swap(addrTakenF, addrTakenG); - } + WriteThunk(F, G); + WriteThunk(F, H); - if (addrTakenF && addrTakenG) { - ThunkGToF(F, G); - } else { - assert(!addrTakenG); - AliasGToF(F, G); - } - } break; - } break; + F->setAlignment(MaxAlignment); + F->setLinkage(GlobalValue::InternalLinkage); + } else { + WriteThunk(F, G); } ++NumFunctionsMerged; - return true; } -// ===----------------------------------------------------------------------=== -// Pass definition -// ===----------------------------------------------------------------------=== +static unsigned ProfileFunction(const Function *F) { + const FunctionType *FTy = F->getFunctionType(); -bool MergeFunctions::runOnModule(Module &M) { - bool Changed = false; + FoldingSetNodeID ID; + ID.AddInteger(F->size()); + ID.AddInteger(F->getCallingConv()); + ID.AddBoolean(F->hasGC()); + ID.AddBoolean(FTy->isVarArg()); + ID.AddInteger(FTy->getReturnType()->getTypeID()); + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + ID.AddInteger(FTy->getParamType(i)->getTypeID()); + return ID.ComputeHash(); +} - std::map<unsigned long, std::vector<Function *> > FnMap; +class ComparableFunction { +public: + ComparableFunction(Function *Func, TargetData *TD) + : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {} - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) - continue; + AssertingVH<Function> const Func; + const unsigned Hash; + TargetData * const TD; +}; - FnMap[hash(F)].push_back(F); +struct MergeFunctionsEqualityInfo { + static ComparableFunction *getEmptyKey() { + return reinterpret_cast<ComparableFunction*>(0); + } + static ComparableFunction *getTombstoneKey() { + return reinterpret_cast<ComparableFunction*>(-1); } + static unsigned getHashValue(const ComparableFunction *CF) { + return CF->Hash; + } + static bool isEqual(const ComparableFunction *LHS, + const ComparableFunction *RHS) { + if (LHS == RHS) + return true; + if (LHS == getEmptyKey() || LHS == getTombstoneKey() || + RHS == getEmptyKey() || RHS == getTombstoneKey()) + return false; + assert(LHS->TD == RHS->TD && "Comparing functions for different targets"); + return FunctionComparator(LHS->TD, LHS->Func, RHS->Func).Compare(); + } +}; +bool MergeFunctions::runOnModule(Module &M) { + typedef DenseSet<ComparableFunction *, MergeFunctionsEqualityInfo> FnSetType; + + bool Changed = false; TD = getAnalysisIfAvailable<TargetData>(); + std::vector<Function *> Funcs; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage()) + Funcs.push_back(F); + } + bool LocalChanged; do { LocalChanged = false; - DEBUG(dbgs() << "size: " << FnMap.size() << "\n"); - for (std::map<unsigned long, std::vector<Function *> >::iterator - I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { - std::vector<Function *> &FnVec = I->second; - DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); - - for (int i = 0, e = FnVec.size(); i != e; ++i) { - for (int j = i + 1; j != e; ++j) { - bool isEqual = equals(FnVec[i], FnVec[j]); - - DEBUG(dbgs() << " " << FnVec[i]->getName() - << (isEqual ? " == " : " != ") - << FnVec[j]->getName() << "\n"); - - if (isEqual) { - if (fold(FnVec, i, j)) { - LocalChanged = true; - FnVec.erase(FnVec.begin() + j); - --j, --e; - } - } - } - } + FnSetType FnSet; + for (unsigned i = 0, e = Funcs.size(); i != e;) { + Function *F = Funcs[i]; + ComparableFunction *NewF = new ComparableFunction(F, TD); + std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); + if (!Result.second) { + ComparableFunction *&OldF = *Result.first; + assert(OldF && "Expected a hash collision"); + + // NewF will be deleted in favour of OldF unless NewF is strong and + // OldF is weak in which case swap them to keep the strong definition. + + if (OldF->Func->isWeakForLinker() && !NewF->Func->isWeakForLinker()) + std::swap(OldF, NewF); + + DEBUG(dbgs() << " " << OldF->Func->getName() << " == " + << NewF->Func->getName() << '\n'); + + Funcs.erase(Funcs.begin() + i); + --e; + + Function *DeleteF = NewF->Func; + delete NewF; + MergeTwoFunctions(OldF->Func, DeleteF); + LocalChanged = true; + Changed = true; + } else { + ++i; + } } - Changed |= LocalChanged; + DeleteContainerPointers(FnSet); } while (LocalChanged); return Changed; diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 6b9814ceb8769..432f7c53a67d1 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -30,7 +30,7 @@ namespace { struct PartialInliner : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } static char ID; // Pass identification, replacement for typeid - PartialInliner() : ModulePass(&ID) {} + PartialInliner() : ModulePass(ID) {} bool runOnModule(Module& M); @@ -40,7 +40,8 @@ namespace { } char PartialInliner::ID = 0; -static RegisterPass<PartialInliner> X("partial-inliner", "Partial Inliner"); +INITIALIZE_PASS(PartialInliner, "partial-inliner", + "Partial Inliner", false, false); ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } @@ -67,7 +68,8 @@ Function* PartialInliner::unswitchFunction(Function* F) { // Clone the function, so that we can hack away on it. ValueMap<const Value*, Value*> VMap; - Function* duplicateFunction = CloneFunction(F, VMap); + Function* duplicateFunction = CloneFunction(F, VMap, + /*ModuleLevelChanges=*/false); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(duplicateFunction); BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]); @@ -159,7 +161,7 @@ bool PartialInliner::runOnModule(Module& M) { bool recursive = false; for (Function::use_iterator UI = currFunc->use_begin(), UE = currFunc->use_end(); UI != UE; ++UI) - if (Instruction* I = dyn_cast<Instruction>(UI)) + if (Instruction* I = dyn_cast<Instruction>(*UI)) if (I->getParent()->getParent() == currFunc) { recursive = true; break; diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp index 58e14481b0edc..4a99a411ab338 100644 --- a/lib/Transforms/IPO/PartialSpecialization.cpp +++ b/lib/Transforms/IPO/PartialSpecialization.cpp @@ -50,14 +50,14 @@ namespace { int scanDistribution(Function&, int, std::map<Constant*, int>&); public : static char ID; // Pass identification, replacement for typeid - PartSpec() : ModulePass(&ID) {} + PartSpec() : ModulePass(ID) {} bool runOnModule(Module &M); }; } char PartSpec::ID = 0; -static RegisterPass<PartSpec> -X("partialspecialization", "Partial Specialization"); +INITIALIZE_PASS(PartSpec, "partialspecialization", + "Partial Specialization", false, false); // Specialize F by replacing the arguments (keys) in replacements with the // constants (values). Replace all calls to F with those constants with @@ -74,7 +74,8 @@ SpecializeFunction(Function* F, deleted[arg->getArgNo()] = arg; } - Function* NF = CloneFunction(F, replacements); + Function* NF = CloneFunction(F, replacements, + /*ModuleLevelChanges=*/false); NF->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(NF); @@ -82,10 +83,10 @@ SpecializeFunction(Function* F, ii != ee; ) { Value::use_iterator i = ii; ++ii; - if (isa<CallInst>(i) || isa<InvokeInst>(i)) { - CallSite CS(cast<Instruction>(i)); + User *U = *i; + CallSite CS(U); + if (CS) { if (CS.getCalledFunction() == F) { - SmallVector<Value*, 6> args; // Assemble the non-specialized arguments for the updated callsite. // In the process, make sure that the specialized arguments are @@ -105,13 +106,13 @@ SpecializeFunction(Function* F, } } Value* NCall; - if (CallInst *CI = dyn_cast<CallInst>(i)) { + if (CallInst *CI = dyn_cast<CallInst>(U)) { NCall = CallInst::Create(NF, args.begin(), args.end(), CI->getName(), CI); cast<CallInst>(NCall)->setTailCall(CI->isTailCall()); cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv()); } else { - InvokeInst *II = cast<InvokeInst>(i); + InvokeInst *II = cast<InvokeInst>(U); NCall = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), args.begin(), args.end(), @@ -123,8 +124,7 @@ SpecializeFunction(Function* F, ++numReplaced; } } - next_use: - ; + next_use:; } return NF; } @@ -174,14 +174,14 @@ void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) { ui != ue; ++ui) { bool interesting = false; - - if (isa<CmpInst>(ui)) interesting = true; - else if (isa<CallInst>(ui)) + User *U = *ui; + if (isa<CmpInst>(U)) interesting = true; + else if (isa<CallInst>(U)) interesting = ui->getOperand(0) == ii; - else if (isa<InvokeInst>(ui)) + else if (isa<InvokeInst>(U)) interesting = ui->getOperand(0) == ii; - else if (isa<SwitchInst>(ui)) interesting = true; - else if (isa<BranchInst>(ui)) interesting = true; + else if (isa<SwitchInst>(U)) interesting = true; + else if (isa<BranchInst>(U)) interesting = true; if (interesting) { args.push_back(std::distance(F.arg_begin(), ii)); @@ -196,14 +196,16 @@ int PartSpec::scanDistribution(Function& F, int arg, std::map<Constant*, int>& dist) { bool hasIndirect = false; int total = 0; - for(Value::use_iterator ii = F.use_begin(), ee = F.use_end(); - ii != ee; ++ii) - if ((isa<CallInst>(ii) || isa<InvokeInst>(ii)) - && ii->getOperand(0) == &F) { - ++dist[dyn_cast<Constant>(ii->getOperand(arg + 1))]; + for (Value::use_iterator ii = F.use_begin(), ee = F.use_end(); + ii != ee; ++ii) { + User *U = *ii; + CallSite CS(U); + if (CS && CS.getCalledFunction() == &F) { + ++dist[dyn_cast<Constant>(CS.getArgument(arg))]; ++total; } else hasIndirect = true; + } // Preserve the original address taken function even if all other uses // will be specialized. diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index de6099cc1daa0..09ac76f979649 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -37,7 +37,7 @@ STATISTIC(NumUnreach, "Number of noreturn calls optimized"); namespace { struct PruneEH : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - PruneEH() : CallGraphSCCPass(&ID) {} + PruneEH() : CallGraphSCCPass(ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. bool runOnSCC(CallGraphSCC &SCC); @@ -48,8 +48,8 @@ namespace { } char PruneEH::ID = 0; -static RegisterPass<PruneEH> -X("prune-eh", "Remove unused exception handling info"); +INITIALIZE_PASS(PruneEH, "prune-eh", + "Remove unused exception handling info", false, false); Pass *llvm::createPruneEHPass() { return new PruneEH(); } diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp index 4566a7634af5a..ee10ad0b8ba26 100644 --- a/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -29,15 +29,15 @@ namespace { class StripDeadPrototypesPass : public ModulePass { public: static char ID; // Pass identification, replacement for typeid - StripDeadPrototypesPass() : ModulePass(&ID) { } + StripDeadPrototypesPass() : ModulePass(ID) { } virtual bool runOnModule(Module &M); }; } // end anonymous namespace char StripDeadPrototypesPass::ID = 0; -static RegisterPass<StripDeadPrototypesPass> -X("strip-dead-prototypes", "Strip Unused Function Prototypes"); +INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes", + "Strip Unused Function Prototypes", false, false); bool StripDeadPrototypesPass::runOnModule(Module &M) { bool MadeChange = false; diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 12e8db8b4a546..20b7b8f2b8509 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -39,7 +39,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripSymbols(bool ODI = false) - : ModulePass(&ID), OnlyDebugInfo(ODI) {} + : ModulePass(ID), OnlyDebugInfo(ODI) {} virtual bool runOnModule(Module &M); @@ -52,7 +52,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripNonDebugSymbols() - : ModulePass(&ID) {} + : ModulePass(ID) {} virtual bool runOnModule(Module &M); @@ -65,7 +65,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripDebugDeclare() - : ModulePass(&ID) {} + : ModulePass(ID) {} virtual bool runOnModule(Module &M); @@ -78,7 +78,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit StripDeadDebugInfo() - : ModulePass(&ID) {} + : ModulePass(ID) {} virtual bool runOnModule(Module &M); @@ -89,32 +89,33 @@ namespace { } char StripSymbols::ID = 0; -static RegisterPass<StripSymbols> -X("strip", "Strip all symbols from a module"); +INITIALIZE_PASS(StripSymbols, "strip", + "Strip all symbols from a module", false, false); ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) { return new StripSymbols(OnlyDebugInfo); } char StripNonDebugSymbols::ID = 0; -static RegisterPass<StripNonDebugSymbols> -Y("strip-nondebug", "Strip all symbols, except dbg symbols, from a module"); +INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug", + "Strip all symbols, except dbg symbols, from a module", + false, false); ModulePass *llvm::createStripNonDebugSymbolsPass() { return new StripNonDebugSymbols(); } char StripDebugDeclare::ID = 0; -static RegisterPass<StripDebugDeclare> -Z("strip-debug-declare", "Strip all llvm.dbg.declare intrinsics"); +INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare", + "Strip all llvm.dbg.declare intrinsics", false, false); ModulePass *llvm::createStripDebugDeclarePass() { return new StripDebugDeclare(); } char StripDeadDebugInfo::ID = 0; -static RegisterPass<StripDeadDebugInfo> -A("strip-dead-debug-info", "Strip debug info for unused symbols"); +INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info", + "Strip debug info for unused symbols", false, false); ModulePass *llvm::createStripDeadDebugInfoPass() { return new StripDeadDebugInfo(); @@ -254,14 +255,15 @@ static bool StripDebugInfo(Module &M) { } } - unsigned MDDbgKind = M.getMDKindID("dbg"); for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI) for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { - Changed = true; // FIXME: Only set if there was debug metadata. - BI->setMetadata(MDDbgKind, 0); + if (!BI->getDebugLoc().isUnknown()) { + Changed = true; + BI->setDebugLoc(DebugLoc()); + } } return Changed; @@ -348,8 +350,8 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(), E = MDs.end(); I != E; ++I) { - if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(), - true)) { + GlobalVariable *GV = DIGlobalVariable(*I).getGlobal(); + if (GV && M.getGlobalVariable(GV->getName(), true)) { if (!NMD) NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); NMD->addOperand(*I); diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp index a74686f408b67..b82b03f7d9e7d 100644 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -1,4 +1,4 @@ -//===-- StructRetPromotion.cpp - Promote sret arguments ------------------===// +//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===// // // The LLVM Compiler Infrastructure // @@ -50,20 +50,19 @@ namespace { virtual bool runOnSCC(CallGraphSCC &SCC); static char ID; // Pass identification, replacement for typeid - SRETPromotion() : CallGraphSCCPass(&ID) {} + SRETPromotion() : CallGraphSCCPass(ID) {} private: CallGraphNode *PromoteReturn(CallGraphNode *CGN); bool isSafeToUpdateAllCallers(Function *F); Function *cloneFunctionBody(Function *F, const StructType *STy); CallGraphNode *updateCallSites(Function *F, Function *NF); - bool nestedStructType(const StructType *STy); }; } char SRETPromotion::ID = 0; -static RegisterPass<SRETPromotion> -X("sretpromotion", "Promote sret arguments to multiple ret values"); +INITIALIZE_PASS(SRETPromotion, "sretpromotion", + "Promote sret arguments to multiple ret values", false, false); Pass *llvm::createStructRetPromotionPass() { return new SRETPromotion(); @@ -156,7 +155,7 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { FnUseI != FnUseE; ++FnUseI) { // The function is passed in as an argument to (possibly) another function, // we can't change it! - CallSite CS = CallSite::get(*FnUseI); + CallSite CS(*FnUseI); Instruction *Call = CS.getInstruction(); // The function is used by something else than a call or invoke instruction, // we can't change it! @@ -187,7 +186,7 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { return false; for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end(); GEPI != GEPE; ++GEPI) - if (!isa<LoadInst>(GEPI)) + if (!isa<LoadInst>(*GEPI)) return false; } // Any other FirstArg users make this function unsuitable for sret @@ -271,7 +270,7 @@ CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) { CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); while (!F->use_empty()) { - CallSite CS = CallSite::get(*F->use_begin()); + CallSite CS(*F->use_begin()); Instruction *Call = CS.getInstruction(); const AttrListPtr &PAL = F->getAttributes(); @@ -351,14 +350,3 @@ CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) { return NF_CGN; } -/// nestedStructType - Return true if STy includes any -/// other aggregate types -bool SRETPromotion::nestedStructType(const StructType *STy) { - unsigned Num = STy->getNumElements(); - for (unsigned i = 0; i < Num; i++) { - const Type *Ty = STy->getElementType(i); - if (!Ty->isSingleValueType() && !Ty->isVoidTy()) - return true; - } - return false; -} diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 24e052881a9d4..6f9609cf997ba 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -81,7 +81,7 @@ public: BuilderTy *Builder; static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {} + InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {} public: virtual bool runOnFunction(Function &F); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 5876f408343b6..19a05bfe9bba3 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -474,19 +474,16 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) --> - // (icmp eq (A & (C1|C2)), (C1|C2)) + // (icmp eq (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { - Instruction *I1 = dyn_cast<Instruction>(Val); - Instruction *I2 = dyn_cast<Instruction>(Val2); - if (I1 && I1->getOpcode() == Instruction::And && - I2 && I2->getOpcode() == Instruction::And && - I1->getOperand(0) == I1->getOperand(0)) { - ConstantInt *CI1 = dyn_cast<ConstantInt>(I1->getOperand(1)); - ConstantInt *CI2 = dyn_cast<ConstantInt>(I2->getOperand(1)); - if (CI1 && !CI1->isZero() && CI2 && !CI2->isZero() && - CI1->getValue().operator&(CI2->getValue()) == 0) { + Value *Op1 = 0, *Op2 = 0; + ConstantInt *CI1 = 0, *CI2 = 0; + if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) && + match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) { + if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() && + CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) { Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); - Value *NewAnd = Builder->CreateAnd(I1->getOperand(0), ConstOr); + Value *NewAnd = Builder->CreateAnd(Op1, ConstOr); return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr); } } @@ -1170,11 +1167,28 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); if (LHSCst == 0 || RHSCst == 0) return 0; - // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) - if (LHSCst == RHSCst && LHSCC == RHSCC && - LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + if (LHSCst == RHSCst && LHSCC == RHSCC) { + // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) + if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + + // (icmp eq (A & C1), 0) | (icmp eq (A & C2), 0) --> + // (icmp ne (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT + if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { + Value *Op1 = 0, *Op2 = 0; + ConstantInt *CI1 = 0, *CI2 = 0; + if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) && + match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) { + if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() && + CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) { + Constant *ConstOr = ConstantExpr::getOr(CI1, CI2); + Value *NewAnd = Builder->CreateAnd(Op1, ConstOr); + return Builder->CreateICmp(ICmpInst::ICMP_NE, NewAnd, ConstOr); + } + } + } } // From here on, we only handle: diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 85251a83d4eac..0ebe3b45589e5 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -96,14 +96,23 @@ static unsigned EnforceKnownAlignment(Value *V, /// increase the alignment of the ultimate object, making this check succeed. unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, unsigned PrefAlign) { - unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) : - sizeof(PrefAlign) * CHAR_BIT; + assert(V->getType()->isPointerTy() && + "GetOrEnforceKnownAlignment expects a pointer!"); + unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); ComputeMaskedBits(V, Mask, KnownZero, KnownOne); unsigned TrailZ = KnownZero.countTrailingOnes(); + + // Avoid trouble with rediculously large TrailZ values, such as + // those computed from a null pointer. + TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); + unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + // LLVM doesn't support alignments larger than this currently. + Align = std::min(Align, +Value::MaximumAlignment); + if (PrefAlign > Align) Align = EnforceKnownAlignment(V, Align, PrefAlign); @@ -529,7 +538,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X + 0 -> {X, false} if (RHS->isZero()) { Constant *V[] = { - UndefValue::get(II->getCalledValue()->getType()), + UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); @@ -630,8 +639,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); APInt DemandedElts(VWidth, 1); APInt UndefElts(VWidth, 0); - if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, - UndefElts)) { + if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), + DemandedElts, UndefElts)) { II->setArgOperand(0, V); return II; } @@ -655,8 +664,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (AllEltsOk) { // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType()); + Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), + Mask->getType()); + Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), + Mask->getType()); Value *Result = UndefValue::get(Op0->getType()); // Only extract each element once. @@ -772,13 +783,15 @@ protected: NewInstruction = IC->ReplaceInstUsesWith(*CI, With); } bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - CallInst::ArgOffset))) { + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { if (SizeCI->isAllOnesValue()) return true; if (isString) return SizeCI->getZExtValue() >= - GetStringLength(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset)); - if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset))) + GetStringLength(CI->getArgOperand(SizeArgOp)); + if (ConstantInt *Arg = dyn_cast<ConstantInt>( + CI->getArgOperand(SizeArgOp))) return SizeCI->getZExtValue() >= Arg->getZExtValue(); } return false; @@ -1140,7 +1153,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { IntrinsicInst *Tramp = cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0)); - Function *NestF = cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); + Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); const PointerType *NestFPTy = cast<PointerType>(NestF->getType()); const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 505a0bf8f4e7f..79a9b09c64d07 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -396,6 +396,11 @@ static bool CanEvaluateTruncated(Value *V, const Type *Ty) { case Instruction::Trunc: // trunc(trunc(x)) -> trunc(x) return true; + case Instruction::ZExt: + case Instruction::SExt: + // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest + // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest + return true; case Instruction::Select: { SelectInst *SI = cast<SelectInst>(I); return CanEvaluateTruncated(SI->getTrueValue(), Ty) && @@ -454,6 +459,29 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } + + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. + Value *A = 0; ConstantInt *Cst = 0; + if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) && + Src->hasOneUse()) { + // We have three types to worry about here, the type of A, the source of + // the truncate (MidSize), and the destination of the truncate. We know that + // ASize < MidSize and MidSize > ResultSize, but don't know the relation + // between ASize and ResultSize. + unsigned ASize = A->getType()->getPrimitiveSizeInBits(); + + // If the shift amount is larger than the size of A, then the result is + // known to be zero because all the input bits got shifted out. + if (Cst->getZExtValue() >= ASize) + return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType())); + + // Since we're doing an lshr and a zero extend, and know that the shift + // amount is smaller than ASize, it is always safe to do the shift in A's + // type, then zero extend or truncate to the result. + Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue()); + Shift->takeName(Src); + return CastInst::CreateIntegerCast(Shift, CI.getType(), false); + } return 0; } @@ -538,8 +566,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if (CI.getType() == In->getType()) return ReplaceInstUsesWith(CI, In); - else - return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); + return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); } } } @@ -1097,6 +1124,38 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { break; } } + + // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) + // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it. + CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); + if (Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "sqrt" && + Call->getNumArgOperands() == 1) { + CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); + if (Arg && Arg->getOpcode() == Instruction::FPExt && + CI.getType()->isFloatTy() && + Call->getType()->isDoubleTy() && + Arg->getType()->isDoubleTy() && + Arg->getOperand(0)->getType()->isFloatTy()) { + Function *Callee = Call->getCalledFunction(); + Module *M = CI.getParent()->getParent()->getParent(); + Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", + Callee->getAttributes(), + Builder->getFloatTy(), + Builder->getFloatTy(), + NULL); + CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), + "sqrtfcall"); + ret->setAttributes(Callee->getAttributes()); + + + // Remove the old Call. With -fmath-errno, it won't get marked readnone. + Call->replaceAllUsesWith(UndefValue::get(Call->getType())); + EraseInstFromFunction(*Call); + return ret; + } + } + return 0; } @@ -1308,6 +1367,199 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, return new ShuffleVectorInst(InVal, V2, Mask); } +static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) { + return Value % Ty->getPrimitiveSizeInBits() == 0; +} + +static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) { + return Value / Ty->getPrimitiveSizeInBits(); +} + +/// CollectInsertionElements - V is a value which is inserted into a vector of +/// VecEltTy. Look through the value to see if we can decompose it into +/// insertions into the vector. See the example in the comment for +/// OptimizeIntegerToVectorInsertions for the pattern this handles. +/// The type of V is always a non-zero multiple of VecEltTy's size. +/// +/// This returns false if the pattern can't be matched or true if it can, +/// filling in Elements with the elements found here. +static bool CollectInsertionElements(Value *V, unsigned ElementIndex, + SmallVectorImpl<Value*> &Elements, + const Type *VecEltTy) { + // Undef values never contribute useful bits to the result. + if (isa<UndefValue>(V)) return true; + + // If we got down to a value of the right type, we win, try inserting into the + // right element. + if (V->getType() == VecEltTy) { + // Inserting null doesn't actually insert any elements. + if (Constant *C = dyn_cast<Constant>(V)) + if (C->isNullValue()) + return true; + + // Fail if multiple elements are inserted into this slot. + if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0) + return false; + + Elements[ElementIndex] = V; + return true; + } + + if (Constant *C = dyn_cast<Constant>(V)) { + // Figure out the # elements this provides, and bitcast it or slice it up + // as required. + unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(), + VecEltTy); + // If the constant is the size of a vector element, we just need to bitcast + // it to the right type so it gets properly inserted. + if (NumElts == 1) + return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), + ElementIndex, Elements, VecEltTy); + + // Okay, this is a constant that covers multiple elements. Slice it up into + // pieces and insert each element-sized piece into the vector. + if (!isa<IntegerType>(C->getType())) + C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(), + C->getType()->getPrimitiveSizeInBits())); + unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); + const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); + + for (unsigned i = 0; i != NumElts; ++i) { + Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), + i*ElementSize)); + Piece = ConstantExpr::getTrunc(Piece, ElementIntTy); + if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy)) + return false; + } + return true; + } + + if (!V->hasOneUse()) return false; + + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return false; + switch (I->getOpcode()) { + default: return false; // Unhandled case. + case Instruction::BitCast: + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy); + case Instruction::ZExt: + if (!isMultipleOfTypeSize( + I->getOperand(0)->getType()->getPrimitiveSizeInBits(), + VecEltTy)) + return false; + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy); + case Instruction::Or: + return CollectInsertionElements(I->getOperand(0), ElementIndex, + Elements, VecEltTy) && + CollectInsertionElements(I->getOperand(1), ElementIndex, + Elements, VecEltTy); + case Instruction::Shl: { + // Must be shifting by a constant that is a multiple of the element size. + ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false; + unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy); + + return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift, + Elements, VecEltTy); + } + + } +} + + +/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we +/// may be doing shifts and ors to assemble the elements of the vector manually. +/// Try to rip the code out and replace it with insertelements. This is to +/// optimize code like this: +/// +/// %tmp37 = bitcast float %inc to i32 +/// %tmp38 = zext i32 %tmp37 to i64 +/// %tmp31 = bitcast float %inc5 to i32 +/// %tmp32 = zext i32 %tmp31 to i64 +/// %tmp33 = shl i64 %tmp32, 32 +/// %ins35 = or i64 %tmp33, %tmp38 +/// %tmp43 = bitcast i64 %ins35 to <2 x float> +/// +/// Into two insertelements that do "buildvector{%inc, %inc5}". +static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, + InstCombiner &IC) { + const VectorType *DestVecTy = cast<VectorType>(CI.getType()); + Value *IntInput = CI.getOperand(0); + + SmallVector<Value*, 8> Elements(DestVecTy->getNumElements()); + if (!CollectInsertionElements(IntInput, 0, Elements, + DestVecTy->getElementType())) + return 0; + + // If we succeeded, we know that all of the element are specified by Elements + // or are zero if Elements has a null entry. Recast this as a set of + // insertions. + Value *Result = Constant::getNullValue(CI.getType()); + for (unsigned i = 0, e = Elements.size(); i != e; ++i) { + if (Elements[i] == 0) continue; // Unset element. + + Result = IC.Builder->CreateInsertElement(Result, Elements[i], + IC.Builder->getInt32(i)); + } + + return Result; +} + + +/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double +/// bitcast. The various long double bitcasts can't get in here. +static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ + Value *Src = CI.getOperand(0); + const Type *DestTy = CI.getType(); + + // If this is a bitcast from int to float, check to see if the int is an + // extraction from a vector. + Value *VecInput = 0; + // bitcast(trunc(bitcast(somevector))) + if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && + isa<VectorType>(VecInput->getType())) { + const VectorType *VecTy = cast<VectorType>(VecInput->getType()); + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); + } + } + + // bitcast(trunc(lshr(bitcast(somevector), cst)) + ConstantInt *ShAmt = 0; + if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), + m_ConstantInt(ShAmt)))) && + isa<VectorType>(VecInput->getType())) { + const VectorType *VecTy = cast<VectorType>(VecInput->getType()); + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 && + ShAmt->getZExtValue() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + unsigned Elt = ShAmt->getZExtValue() / DestWidth; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); + } + } + return 0; +} Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, @@ -1359,6 +1611,11 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { ((Instruction*)NULL)); } } + + // Try to optimize int -> float bitcasts. + if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) + if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) + return I; if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { @@ -1368,16 +1625,24 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } - // If this is a cast from an integer to vector, check to see if the input - // is a trunc or zext of a bitcast from vector. If so, we can replace all - // the casts with a shuffle and (potentially) a bitcast. - if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){ - CastInst *SrcCast = cast<CastInst>(Src); - if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) - if (isa<VectorType>(BCIn->getOperand(0)->getType())) - if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), + if (isa<IntegerType>(SrcTy)) { + // If this is a cast from an integer to vector, check to see if the input + // is a trunc or zext of a bitcast from vector. If so, we can replace all + // the casts with a shuffle and (potentially) a bitcast. + if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) { + CastInst *SrcCast = cast<CastInst>(Src); + if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) + if (isa<VectorType>(BCIn->getOperand(0)->getType())) + if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), cast<VectorType>(DestTy), *this)) - return I; + return I; + } + + // If the input is an 'or' instruction, we may be doing shifts and ors to + // assemble the elements of the vector manually. Try to rip the code out + // and replace it with insertelements. + if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this)) + return ReplaceInstUsesWith(CI, V); } } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 6c00586412ac1..d7e2b72b7fac5 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1374,7 +1374,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, case Instruction::Or: // If bits are being or'd in that are not present in the constant we // are comparing against, then the comparison could never succeed! - if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { + if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { Constant *NotCI = ConstantExpr::getNot(RHS); if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) return ReplaceInstUsesWith(ICI, diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 8933a0b137ab4..b68fbc2db5c9a 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -146,10 +146,14 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (TD) { unsigned KnownAlign = GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); - if (KnownAlign > - (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : - LI.getAlignment())) + unsigned LoadAlign = LI.getAlignment(); + unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : + TD->getABITypeAlignment(LI.getType()); + + if (KnownAlign > EffectiveLoadAlign) LI.setAlignment(KnownAlign); + else if (LoadAlign == 0) + LI.setAlignment(EffectiveLoadAlign); } // load (cast X) --> cast (load X) iff safe. @@ -369,7 +373,7 @@ DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U)) return DI; if (isa<BitCastInst>(U) && U->hasOneUse()) { - if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U->use_begin())) + if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(*U->use_begin())) return DI; } } @@ -411,10 +415,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (TD) { unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); - if (KnownAlign > - (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : - SI.getAlignment())) + unsigned StoreAlign = SI.getAlignment(); + unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : + TD->getABITypeAlignment(Val->getType()); + + if (KnownAlign > EffectiveStoreAlign) SI.setAlignment(KnownAlign); + else if (StoreAlign == 0) + SI.setAlignment(EffectiveStoreAlign); } // Do really simple DSE, to catch cases where there are several consecutive diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index f9ffdb10f2660..c44fe9db6e3a7 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -699,34 +699,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { SI.setOperand(2, TrueVal); return &SI; } - - // select (A == 0 | B == 0), T, F--> select (A != 0 & B != 0), F, T - // Note: This is a canonicalization rather than an optimization, and is used - // to expose opportunities to other instcombine transforms. - Instruction* CondInst = dyn_cast<Instruction>(CondVal); - if (CondInst && CondInst->hasOneUse() && - CondInst->getOpcode() == Instruction::Or) { - ICmpInst *LHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(0)); - ICmpInst *RHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(1)); - if (LHSCmp && LHSCmp->hasOneUse() && - LHSCmp->getPredicate() == ICmpInst::ICMP_EQ && - RHSCmp && RHSCmp->hasOneUse() && - RHSCmp->getPredicate() == ICmpInst::ICMP_EQ) { - ConstantInt* C1 = dyn_cast<ConstantInt>(LHSCmp->getOperand(1)); - ConstantInt* C2 = dyn_cast<ConstantInt>(RHSCmp->getOperand(1)); - if (C1 && C1->isZero() && C2 && C2->isZero()) { - LHSCmp->setPredicate(ICmpInst::ICMP_NE); - RHSCmp->setPredicate(ICmpInst::ICMP_NE); - Value *And = - InsertNewInstBefore(BinaryOperator::CreateAnd(LHSCmp, RHSCmp, - "and."+CondVal->getName()), SI); - SI.setOperand(0, And); - SI.setOperand(1, FalseVal); - SI.setOperand(2, TrueVal); - return &SI; - } - } - } return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index e5ce8a612f3f6..27716b886a226 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -56,10 +56,270 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { return 0; } +/// CanEvaluateShifted - See if we can compute the specified value, but shifted +/// logically to the left or right by some number of bits. This should return +/// true if the expression can be computed for the same cost as the current +/// expression tree. This is used to eliminate extraneous shifting from things +/// like: +/// %C = shl i128 %A, 64 +/// %D = shl i128 %B, 96 +/// %E = or i128 %C, %D +/// %F = lshr i128 %E, 64 +/// where the client will ask if E can be computed shifted right by 64-bits. If +/// this succeeds, the GetShiftedValue function will be called to produce the +/// value. +static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, + InstCombiner &IC) { + // We can always evaluate constants shifted. + if (isa<Constant>(V)) + return true; + + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return false; + + // If this is the opposite shift, we can directly reuse the input of the shift + // if the needed bits are already zero in the input. This allows us to reuse + // the value which means that we don't care if the shift has multiple uses. + // TODO: Handle opposite shift by exact value. + ConstantInt *CI; + if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || + (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { + if (CI->getZExtValue() == NumBits) { + // TODO: Check that the input bits are already zero with MaskedValueIsZero +#if 0 + // If this is a truncate of a logical shr, we can truncate it to a smaller + // lshr iff we know that the bits we would otherwise be shifting in are + // already zeros. + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && + CI->getLimitedValue(BitWidth) < BitWidth) { + return CanEvaluateTruncated(I->getOperand(0), Ty); + } +#endif + + } + } + + // We can't mutate something that has multiple uses: doing so would + // require duplicating the instruction in general, which isn't profitable. + if (!I->hasOneUse()) return false; + + switch (I->getOpcode()) { + default: return false; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. + return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) && + CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC); + + case Instruction::Shl: { + // We can often fold the shift into shifts-by-a-constant. + CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). + if (isLeftShift) return true; + + // We can always turn shl(c)+shr(c) -> and(c2). + if (CI->getValue() == NumBits) return true; + + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't + // profitable unless we know the and'd out bits are already zero. + if (CI->getZExtValue() > NumBits) { + unsigned HighBits = CI->getZExtValue() - NumBits; + if (MaskedValueIsZero(I->getOperand(0), + APInt::getHighBitsSet(TypeWidth, HighBits))) + return true; + } + + return false; + } + case Instruction::LShr: { + // We can often fold the shift into shifts-by-a-constant. + CI = dyn_cast<ConstantInt>(I->getOperand(1)); + if (CI == 0) return false; + + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). + if (!isLeftShift) return true; + + // We can always turn lshr(c)+shl(c) -> and(c2). + if (CI->getValue() == NumBits) return true; + + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't + // profitable unless we know the and'd out bits are already zero. + if (CI->getZExtValue() > NumBits) { + unsigned LowBits = CI->getZExtValue() - NumBits; + if (MaskedValueIsZero(I->getOperand(0), + APInt::getLowBitsSet(TypeWidth, LowBits))) + return true; + } + + return false; + } + case Instruction::Select: { + SelectInst *SI = cast<SelectInst>(I); + return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) && + CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC); + } + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC)) + return false; + return true; + } + } +} + +/// GetShiftedValue - When CanEvaluateShifted returned true for an expression, +/// this value inserts the new computation that produces the shifted value. +static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, + InstCombiner &IC) { + // We can always evaluate constants shifted. + if (Constant *C = dyn_cast<Constant>(V)) { + if (isLeftShift) + V = IC.Builder->CreateShl(C, NumBits); + else + V = IC.Builder->CreateLShr(C, NumBits); + // If we got a constantexpr back, try to simplify it with TD info. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + V = ConstantFoldConstantExpression(CE, IC.getTargetData()); + return V; + } + + Instruction *I = cast<Instruction>(V); + IC.Worklist.Add(I); + + switch (I->getOpcode()) { + default: assert(0 && "Inconsistency with CanEvaluateShifted"); + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. + I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); + I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + return I; + + case Instruction::Shl: { + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + + // We only accept shifts-by-a-constant in CanEvaluateShifted. + ConstantInt *CI = cast<ConstantInt>(I->getOperand(1)); + + // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). + if (isLeftShift) { + // If this is oversized composite shift, then unsigned shifts get 0. + unsigned NewShAmt = NumBits+CI->getZExtValue(); + if (NewShAmt >= TypeWidth) + return Constant::getNullValue(I->getType()); + + I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt)); + return I; + } + + // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have + // zeros. + if (CI->getValue() == NumBits) { + APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits)); + V = IC.Builder->CreateAnd(I->getOperand(0), + ConstantInt::get(I->getContext(), Mask)); + if (Instruction *VI = dyn_cast<Instruction>(V)) { + VI->moveBefore(I); + VI->takeName(I); + } + return V; + } + + // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that + // the and won't be needed. + assert(CI->getZExtValue() > NumBits); + I->setOperand(1, ConstantInt::get(I->getType(), + CI->getZExtValue() - NumBits)); + return I; + } + case Instruction::LShr: { + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); + // We only accept shifts-by-a-constant in CanEvaluateShifted. + ConstantInt *CI = cast<ConstantInt>(I->getOperand(1)); + + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). + if (!isLeftShift) { + // If this is oversized composite shift, then unsigned shifts get 0. + unsigned NewShAmt = NumBits+CI->getZExtValue(); + if (NewShAmt >= TypeWidth) + return Constant::getNullValue(I->getType()); + + I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt)); + return I; + } + + // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have + // zeros. + if (CI->getValue() == NumBits) { + APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits)); + V = IC.Builder->CreateAnd(I->getOperand(0), + ConstantInt::get(I->getContext(), Mask)); + if (Instruction *VI = dyn_cast<Instruction>(V)) { + VI->moveBefore(I); + VI->takeName(I); + } + return V; + } + + // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that + // the and won't be needed. + assert(CI->getZExtValue() > NumBits); + I->setOperand(1, ConstantInt::get(I->getType(), + CI->getZExtValue() - NumBits)); + return I; + } + + case Instruction::Select: + I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); + I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); + return I; + case Instruction::PHI: { + // We can change a phi if we can change all operands. Note that we never + // get into trouble with cyclic PHIs here because we only consider + // instructions with a single use. + PHINode *PN = cast<PHINode>(I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), + NumBits, isLeftShift, IC)); + return PN; + } + } +} + + + Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; - + + + // See if we can propagate this shift into the input, this covers the trivial + // cast of lshr(shl(x,c1),c2) as well as other more complex cases. + if (I.getOpcode() != Instruction::AShr && + CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { + DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" + " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); + + return ReplaceInstUsesWith(I, + GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); + } + + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); @@ -288,39 +548,17 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, ConstantInt::get(Ty, AmtSum)); } - if (ShiftOp->getOpcode() == Instruction::LShr && - I.getOpcode() == Instruction::AShr) { - if (AmtSum >= TypeBits) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. - return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); - } - - if (ShiftOp->getOpcode() == Instruction::AShr && - I.getOpcode() == Instruction::LShr) { - // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. - if (AmtSum >= TypeBits) - AmtSum = TypeBits-1; - - Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); - - APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(), Mask)); - } - - // Okay, if we get here, one shift must be left, and the other shift must be - // right. See if the amounts are equal. if (ShiftAmt1 == ShiftAmt2) { // If we have ((X >>? C) << C), turn this into X & (-1 << C). - if (I.getOpcode() == Instruction::Shl) { + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),Mask)); } // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). - if (I.getOpcode() == Instruction::LShr) { + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), Mask)); @@ -329,7 +567,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); @@ -340,7 +579,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::Shl); Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); @@ -355,9 +595,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) - if (I.getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::LShr || - ShiftOp->getOpcode() == Instruction::AShr); + if (I.getOpcode() == Instruction::Shl && + ShiftOp->getOpcode() != Instruction::Shl) { Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, ConstantInt::get(Ty, ShiftDiff)); @@ -367,8 +606,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) - if (I.getOpcode() == Instruction::LShr) { - assert(ShiftOp->getOpcode() == Instruction::Shl); + if (I.getOpcode() == Instruction::LShr && + ShiftOp->getOpcode() == Instruction::Shl) { Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index af2958fe3d910..e46c67994e2b0 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -60,8 +60,8 @@ STATISTIC(NumSunkInst , "Number of instructions sunk"); char InstCombiner::ID = 0; -static RegisterPass<InstCombiner> -X("instcombine", "Combine redundant instructions"); +INITIALIZE_PASS(InstCombiner, "instcombine", + "Combine redundant instructions", false, false); void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreservedID(LCSSAID); diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp index 9ae3786707153..a77d70cd1c1bc 100644 --- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -34,7 +34,7 @@ namespace { bool runOnModule(Module &M); public: static char ID; // Pass identification, replacement for typeid - EdgeProfiler() : ModulePass(&ID) {} + EdgeProfiler() : ModulePass(ID) {} virtual const char *getPassName() const { return "Edge Profiler"; @@ -43,8 +43,8 @@ namespace { } char EdgeProfiler::ID = 0; -static RegisterPass<EdgeProfiler> -X("insert-edge-profiling", "Insert instrumentation for edge profiling"); +INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling", + "Insert instrumentation for edge profiling", false, false); ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); } diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 41e3a39f2685b..8eec9872812dc 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -36,7 +36,7 @@ namespace { bool runOnModule(Module &M); public: static char ID; // Pass identification, replacement for typeid - OptimalEdgeProfiler() : ModulePass(&ID) {} + OptimalEdgeProfiler() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(ProfileEstimatorPassID); @@ -50,9 +50,9 @@ namespace { } char OptimalEdgeProfiler::ID = 0; -static RegisterPass<OptimalEdgeProfiler> -X("insert-optimal-edge-profiling", - "Insert optimal instrumentation for edge profiling"); +INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling", + "Insert optimal instrumentation for edge profiling", + false, false); ModulePass *llvm::createOptimalEdgeProfilerPass() { return new OptimalEdgeProfiler(); diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp deleted file mode 100644 index dcf14a6860da5..0000000000000 --- a/lib/Transforms/Scalar/ABCD.cpp +++ /dev/null @@ -1,1112 +0,0 @@ -//===------- ABCD.cpp - Removes redundant conditional branches ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass removes redundant branch instructions. This algorithm was -// described by Rastislav Bodik, Rajiv Gupta and Vivek Sarkar in their paper -// "ABCD: Eliminating Array Bounds Checks on Demand (2000)". The original -// Algorithm was created to remove array bound checks for strongly typed -// languages. This implementation expands the idea and removes any conditional -// branches that can be proved redundant, not only those used in array bound -// checks. With the SSI representation, each variable has a -// constraint. By analyzing these constraints we can prove that a branch is -// redundant. When a branch is proved redundant it means that -// one direction will always be taken; thus, we can change this branch into an -// unconditional jump. -// It is advisable to run SimplifyCFG and Aggressive Dead Code Elimination -// after ABCD to clean up the code. -// This implementation was created based on the implementation of the ABCD -// algorithm implemented for the compiler Jitrino. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "abcd" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/SSI.h" - -using namespace llvm; - -STATISTIC(NumBranchTested, "Number of conditional branches analyzed"); -STATISTIC(NumBranchRemoved, "Number of conditional branches removed"); - -namespace { - -class ABCD : public FunctionPass { - public: - static char ID; // Pass identification, replacement for typeid. - ABCD() : FunctionPass(&ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<SSI>(); - } - - bool runOnFunction(Function &F); - - private: - /// Keep track of whether we've modified the program yet. - bool modified; - - enum ProveResult { - False = 0, - Reduced = 1, - True = 2 - }; - - typedef ProveResult (*meet_function)(ProveResult, ProveResult); - static ProveResult max(ProveResult res1, ProveResult res2) { - return (ProveResult) std::max(res1, res2); - } - static ProveResult min(ProveResult res1, ProveResult res2) { - return (ProveResult) std::min(res1, res2); - } - - class Bound { - public: - Bound(APInt v, bool upper) : value(v), upper_bound(upper) {} - Bound(const Bound &b, int cnst) - : value(b.value - cnst), upper_bound(b.upper_bound) {} - Bound(const Bound &b, const APInt &cnst) - : value(b.value - cnst), upper_bound(b.upper_bound) {} - - /// Test if Bound is an upper bound - bool isUpperBound() const { return upper_bound; } - - /// Get the bitwidth of this bound - int32_t getBitWidth() const { return value.getBitWidth(); } - - /// Creates a Bound incrementing the one received - static Bound createIncrement(const Bound &b) { - return Bound(b.isUpperBound() ? b.value+1 : b.value-1, - b.upper_bound); - } - - /// Creates a Bound decrementing the one received - static Bound createDecrement(const Bound &b) { - return Bound(b.isUpperBound() ? b.value-1 : b.value+1, - b.upper_bound); - } - - /// Test if two bounds are equal - static bool eq(const Bound *a, const Bound *b) { - if (!a || !b) return false; - - assert(a->isUpperBound() == b->isUpperBound()); - return a->value == b->value; - } - - /// Test if val is less than or equal to Bound b - static bool leq(APInt val, const Bound &b) { - return b.isUpperBound() ? val.sle(b.value) : val.sge(b.value); - } - - /// Test if Bound a is less then or equal to Bound - static bool leq(const Bound &a, const Bound &b) { - assert(a.isUpperBound() == b.isUpperBound()); - return a.isUpperBound() ? a.value.sle(b.value) : - a.value.sge(b.value); - } - - /// Test if Bound a is less then Bound b - static bool lt(const Bound &a, const Bound &b) { - assert(a.isUpperBound() == b.isUpperBound()); - return a.isUpperBound() ? a.value.slt(b.value) : - a.value.sgt(b.value); - } - - /// Test if Bound b is greater then or equal val - static bool geq(const Bound &b, APInt val) { - return leq(val, b); - } - - /// Test if Bound a is greater then or equal Bound b - static bool geq(const Bound &a, const Bound &b) { - return leq(b, a); - } - - private: - APInt value; - bool upper_bound; - }; - - /// This class is used to store results some parts of the graph, - /// so information does not need to be recalculated. The maximum false, - /// minimum true and minimum reduced results are stored - class MemoizedResultChart { - public: - MemoizedResultChart() {} - MemoizedResultChart(const MemoizedResultChart &other) { - if (other.max_false) - max_false.reset(new Bound(*other.max_false)); - if (other.min_true) - min_true.reset(new Bound(*other.min_true)); - if (other.min_reduced) - min_reduced.reset(new Bound(*other.min_reduced)); - } - - /// Returns the max false - const Bound *getFalse() const { return max_false.get(); } - - /// Returns the min true - const Bound *getTrue() const { return min_true.get(); } - - /// Returns the min reduced - const Bound *getReduced() const { return min_reduced.get(); } - - /// Return the stored result for this bound - ProveResult getResult(const Bound &bound) const; - - /// Stores a false found - void addFalse(const Bound &bound); - - /// Stores a true found - void addTrue(const Bound &bound); - - /// Stores a Reduced found - void addReduced(const Bound &bound); - - /// Clears redundant reduced - /// If a min_true is smaller than a min_reduced then the min_reduced - /// is unnecessary and then removed. It also works for min_reduced - /// begin smaller than max_false. - void clearRedundantReduced(); - - void clear() { - max_false.reset(); - min_true.reset(); - min_reduced.reset(); - } - - private: - OwningPtr<Bound> max_false, min_true, min_reduced; - }; - - /// This class stores the result found for a node of the graph, - /// so these results do not need to be recalculated, only searched for. - class MemoizedResult { - public: - /// Test if there is true result stored from b to a - /// that is less then the bound - bool hasTrue(Value *b, const Bound &bound) const { - const Bound *trueBound = map.lookup(b).getTrue(); - return trueBound && Bound::leq(*trueBound, bound); - } - - /// Test if there is false result stored from b to a - /// that is less then the bound - bool hasFalse(Value *b, const Bound &bound) const { - const Bound *falseBound = map.lookup(b).getFalse(); - return falseBound && Bound::leq(*falseBound, bound); - } - - /// Test if there is reduced result stored from b to a - /// that is less then the bound - bool hasReduced(Value *b, const Bound &bound) const { - const Bound *reducedBound = map.lookup(b).getReduced(); - return reducedBound && Bound::leq(*reducedBound, bound); - } - - /// Returns the stored bound for b - ProveResult getBoundResult(Value *b, const Bound &bound) { - return map[b].getResult(bound); - } - - /// Clears the map - void clear() { - DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin(); - DenseMapIterator<Value*, MemoizedResultChart> end = map.end(); - for (; begin != end; ++begin) { - begin->second.clear(); - } - map.clear(); - } - - /// Stores the bound found - void updateBound(Value *b, const Bound &bound, const ProveResult res); - - private: - // Maps a nod in the graph with its results found. - DenseMap<Value*, MemoizedResultChart> map; - }; - - /// This class represents an edge in the inequality graph used by the - /// ABCD algorithm. An edge connects node v to node u with a value c if - /// we could infer a constraint v <= u + c in the source program. - class Edge { - public: - Edge(Value *V, APInt val, bool upper) - : vertex(V), value(val), upper_bound(upper) {} - - Value *getVertex() const { return vertex; } - const APInt &getValue() const { return value; } - bool isUpperBound() const { return upper_bound; } - - private: - Value *vertex; - APInt value; - bool upper_bound; - }; - - /// Weighted and Directed graph to represent constraints. - /// There is one type of constraint, a <= b + X, which will generate an - /// edge from b to a with weight X. - class InequalityGraph { - public: - - /// Adds an edge from V_from to V_to with weight value - void addEdge(Value *V_from, Value *V_to, APInt value, bool upper); - - /// Test if there is a node V - bool hasNode(Value *V) const { return graph.count(V); } - - /// Test if there is any edge from V in the upper direction - bool hasEdge(Value *V, bool upper) const; - - /// Returns all edges pointed by vertex V - SmallVector<Edge, 16> getEdges(Value *V) const { - return graph.lookup(V); - } - - /// Prints the graph in dot format. - /// Blue edges represent upper bound and Red lower bound. - void printGraph(raw_ostream &OS, Function &F) const { - printHeader(OS, F); - printBody(OS); - printFooter(OS); - } - - /// Clear the graph - void clear() { - graph.clear(); - } - - private: - DenseMap<Value *, SmallVector<Edge, 16> > graph; - - /// Prints the header of the dot file - void printHeader(raw_ostream &OS, Function &F) const; - - /// Prints the footer of the dot file - void printFooter(raw_ostream &OS) const { - OS << "}\n"; - } - - /// Prints the body of the dot file - void printBody(raw_ostream &OS) const; - - /// Prints vertex source to the dot file - void printVertex(raw_ostream &OS, Value *source) const; - - /// Prints the edge to the dot file - void printEdge(raw_ostream &OS, Value *source, const Edge &edge) const; - - void printName(raw_ostream &OS, Value *info) const; - }; - - /// Iterates through all BasicBlocks, if the Terminator Instruction - /// uses an Comparator Instruction, all operands of this comparator - /// are sent to be transformed to SSI. Only Instruction operands are - /// transformed. - void createSSI(Function &F); - - /// Creates the graphs for this function. - /// It will look for all comparators used in branches, and create them. - /// These comparators will create constraints for any instruction as an - /// operand. - void executeABCD(Function &F); - - /// Seeks redundancies in the comparator instruction CI. - /// If the ABCD algorithm can prove that the comparator CI always - /// takes one way, then the Terminator Instruction TI is substituted from - /// a conditional branch to a unconditional one. - /// This code basically receives a comparator, and verifies which kind of - /// instruction it is. Depending on the kind of instruction, we use different - /// strategies to prove its redundancy. - void seekRedundancy(ICmpInst *ICI, TerminatorInst *TI); - - /// Substitutes Terminator Instruction TI, that is a conditional branch, - /// with one unconditional branch. Succ_edge determines if the new - /// unconditional edge will be the first or second edge of the former TI - /// instruction. - void removeRedundancy(TerminatorInst *TI, bool Succ_edge); - - /// When an conditional branch is removed, the BasicBlock that is no longer - /// reachable will have problems in phi functions. This method fixes these - /// phis removing the former BasicBlock from the list of incoming BasicBlocks - /// of all phis. In case the phi remains with no predecessor it will be - /// marked to be removed later. - void fixPhi(BasicBlock *BB, BasicBlock *Succ); - - /// Removes phis that have no predecessor - void removePhis(); - - /// Creates constraints for Instructions. - /// If the constraint for this instruction has already been created - /// nothing is done. - void createConstraintInstruction(Instruction *I); - - /// Creates constraints for Binary Operators. - /// It will create constraints only for addition and subtraction, - /// the other binary operations are not treated by ABCD. - /// For additions in the form a = b + X and a = X + b, where X is a constant, - /// the constraint a <= b + X can be obtained. For this constraint, an edge - /// a->b with weight X is added to the lower bound graph, and an edge - /// b->a with weight -X is added to the upper bound graph. - /// Only subtractions in the format a = b - X is used by ABCD. - /// Edges are created using the same semantic as addition. - void createConstraintBinaryOperator(BinaryOperator *BO); - - /// Creates constraints for Comparator Instructions. - /// Only comparators that have any of the following operators - /// are used to create constraints: >=, >, <=, <. And only if - /// at least one operand is an Instruction. In a Comparator Instruction - /// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where - /// t and f represent sigma for operands in true and false branches. The - /// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and - /// b_f <= b. There are two more constraints that depend on the operator. - /// For the operator <= : a_t <= b_t and b_f <= a_f-1 - /// For the operator < : a_t <= b_t-1 and b_f <= a_f - /// For the operator >= : b_t <= a_t and a_f <= b_f-1 - /// For the operator > : b_t <= a_t-1 and a_f <= b_f - void createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI); - - /// Creates constraints for PHI nodes. - /// In a PHI node a = phi(b,c) we can create the constraint - /// a<= max(b,c). With this constraint there will be the edges, - /// b->a and c->a with weight 0 in the lower bound graph, and the edges - /// a->b and a->c with weight 0 in the upper bound graph. - void createConstraintPHINode(PHINode *PN); - - /// Given a binary operator, we are only interest in the case - /// that one operand is an Instruction and the other is a ConstantInt. In - /// this case the method returns true, otherwise false. It also obtains the - /// Instruction and ConstantInt from the BinaryOperator and returns it. - bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1, - Instruction **I2, ConstantInt **C1, - ConstantInt **C2); - - /// This method creates a constraint between a Sigma and an Instruction. - /// These constraints are created as soon as we find a comparator that uses a - /// SSI variable. - void createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t, - BasicBlock *BB_succ_f, PHINode **SIG_op_t, - PHINode **SIG_op_f); - - /// If PN_op1 and PN_o2 are different from NULL, create a constraint - /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace - /// with the respective V_op#, if V_op# is a ConstantInt. - void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, - ConstantInt *V_op1, ConstantInt *V_op2, - APInt value); - - /// Returns the sigma representing the Instruction I in BasicBlock BB. - /// Returns NULL in case there is no sigma for this Instruction in this - /// Basic Block. This methods assume that sigmas are the first instructions - /// in a block, and that there can be only two sigmas in a block. So it will - /// only look on the first two instructions of BasicBlock BB. - PHINode *findSigma(BasicBlock *BB, Instruction *I); - - /// Original ABCD algorithm to prove redundant checks. - /// This implementation works on any kind of inequality branch. - bool demandProve(Value *a, Value *b, int c, bool upper_bound); - - /// Prove that distance between b and a is <= bound - ProveResult prove(Value *a, Value *b, const Bound &bound, unsigned level); - - /// Updates the distance value for a and b - void updateMemDistance(Value *a, Value *b, const Bound &bound, unsigned level, - meet_function meet); - - InequalityGraph inequality_graph; - MemoizedResult mem_result; - DenseMap<Value*, const Bound*> active; - SmallPtrSet<Value*, 16> created; - SmallVector<PHINode *, 16> phis_to_remove; -}; - -} // end anonymous namespace. - -char ABCD::ID = 0; -static RegisterPass<ABCD> X("abcd", "ABCD: Eliminating Array Bounds Checks on Demand"); - - -bool ABCD::runOnFunction(Function &F) { - modified = false; - createSSI(F); - executeABCD(F); - DEBUG(inequality_graph.printGraph(dbgs(), F)); - removePhis(); - - inequality_graph.clear(); - mem_result.clear(); - active.clear(); - created.clear(); - phis_to_remove.clear(); - return modified; -} - -/// Iterates through all BasicBlocks, if the Terminator Instruction -/// uses an Comparator Instruction, all operands of this comparator -/// are sent to be transformed to SSI. Only Instruction operands are -/// transformed. -void ABCD::createSSI(Function &F) { - SSI *ssi = &getAnalysis<SSI>(); - - SmallVector<Instruction *, 16> Insts; - - for (Function::iterator begin = F.begin(), end = F.end(); - begin != end; ++begin) { - BasicBlock *BB = begin; - TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumOperands() == 0) - continue; - - if (ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0))) { - if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(0))) { - modified = true; // XXX: but yet createSSI might do nothing - Insts.push_back(I); - } - if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(1))) { - modified = true; - Insts.push_back(I); - } - } - } - ssi->createSSI(Insts); -} - -/// Creates the graphs for this function. -/// It will look for all comparators used in branches, and create them. -/// These comparators will create constraints for any instruction as an -/// operand. -void ABCD::executeABCD(Function &F) { - for (Function::iterator begin = F.begin(), end = F.end(); - begin != end; ++begin) { - BasicBlock *BB = begin; - TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumOperands() == 0) - continue; - - ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0)); - if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy()) - continue; - - createConstraintCmpInst(ICI, TI); - seekRedundancy(ICI, TI); - } -} - -/// Seeks redundancies in the comparator instruction CI. -/// If the ABCD algorithm can prove that the comparator CI always -/// takes one way, then the Terminator Instruction TI is substituted from -/// a conditional branch to a unconditional one. -/// This code basically receives a comparator, and verifies which kind of -/// instruction it is. Depending on the kind of instruction, we use different -/// strategies to prove its redundancy. -void ABCD::seekRedundancy(ICmpInst *ICI, TerminatorInst *TI) { - CmpInst::Predicate Pred = ICI->getPredicate(); - - Value *source, *dest; - int distance1, distance2; - bool upper; - - switch(Pred) { - case CmpInst::ICMP_SGT: // signed greater than - upper = false; - distance1 = 1; - distance2 = 0; - break; - - case CmpInst::ICMP_SGE: // signed greater or equal - upper = false; - distance1 = 0; - distance2 = -1; - break; - - case CmpInst::ICMP_SLT: // signed less than - upper = true; - distance1 = -1; - distance2 = 0; - break; - - case CmpInst::ICMP_SLE: // signed less or equal - upper = true; - distance1 = 0; - distance2 = 1; - break; - - default: - return; - } - - ++NumBranchTested; - source = ICI->getOperand(0); - dest = ICI->getOperand(1); - if (demandProve(dest, source, distance1, upper)) { - removeRedundancy(TI, true); - } else if (demandProve(dest, source, distance2, !upper)) { - removeRedundancy(TI, false); - } -} - -/// Substitutes Terminator Instruction TI, that is a conditional branch, -/// with one unconditional branch. Succ_edge determines if the new -/// unconditional edge will be the first or second edge of the former TI -/// instruction. -void ABCD::removeRedundancy(TerminatorInst *TI, bool Succ_edge) { - BasicBlock *Succ; - if (Succ_edge) { - Succ = TI->getSuccessor(0); - fixPhi(TI->getParent(), TI->getSuccessor(1)); - } else { - Succ = TI->getSuccessor(1); - fixPhi(TI->getParent(), TI->getSuccessor(0)); - } - - BranchInst::Create(Succ, TI); - TI->eraseFromParent(); // XXX: invoke - ++NumBranchRemoved; - modified = true; -} - -/// When an conditional branch is removed, the BasicBlock that is no longer -/// reachable will have problems in phi functions. This method fixes these -/// phis removing the former BasicBlock from the list of incoming BasicBlocks -/// of all phis. In case the phi remains with no predecessor it will be -/// marked to be removed later. -void ABCD::fixPhi(BasicBlock *BB, BasicBlock *Succ) { - BasicBlock::iterator begin = Succ->begin(); - while (PHINode *PN = dyn_cast<PHINode>(begin++)) { - PN->removeIncomingValue(BB, false); - if (PN->getNumIncomingValues() == 0) - phis_to_remove.push_back(PN); - } -} - -/// Removes phis that have no predecessor -void ABCD::removePhis() { - for (unsigned i = 0, e = phis_to_remove.size(); i != e; ++i) { - PHINode *PN = phis_to_remove[i]; - PN->replaceAllUsesWith(UndefValue::get(PN->getType())); - PN->eraseFromParent(); - } -} - -/// Creates constraints for Instructions. -/// If the constraint for this instruction has already been created -/// nothing is done. -void ABCD::createConstraintInstruction(Instruction *I) { - // Test if this instruction has not been created before - if (created.insert(I)) { - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - createConstraintBinaryOperator(BO); - } else if (PHINode *PN = dyn_cast<PHINode>(I)) { - createConstraintPHINode(PN); - } - } -} - -/// Creates constraints for Binary Operators. -/// It will create constraints only for addition and subtraction, -/// the other binary operations are not treated by ABCD. -/// For additions in the form a = b + X and a = X + b, where X is a constant, -/// the constraint a <= b + X can be obtained. For this constraint, an edge -/// a->b with weight X is added to the lower bound graph, and an edge -/// b->a with weight -X is added to the upper bound graph. -/// Only subtractions in the format a = b - X is used by ABCD. -/// Edges are created using the same semantic as addition. -void ABCD::createConstraintBinaryOperator(BinaryOperator *BO) { - Instruction *I1 = NULL, *I2 = NULL; - ConstantInt *CI1 = NULL, *CI2 = NULL; - - // Test if an operand is an Instruction and the other is a Constant - if (!createBinaryOperatorInfo(BO, &I1, &I2, &CI1, &CI2)) - return; - - Instruction *I = 0; - APInt value; - - switch (BO->getOpcode()) { - case Instruction::Add: - if (I1) { - I = I1; - value = CI2->getValue(); - } else if (I2) { - I = I2; - value = CI1->getValue(); - } - break; - - case Instruction::Sub: - // Instructions like a = X-b, where X is a constant are not represented - // in the graph. - if (!I1) - return; - - I = I1; - value = -CI2->getValue(); - break; - - default: - return; - } - - inequality_graph.addEdge(I, BO, value, true); - inequality_graph.addEdge(BO, I, -value, false); - createConstraintInstruction(I); -} - -/// Given a binary operator, we are only interest in the case -/// that one operand is an Instruction and the other is a ConstantInt. In -/// this case the method returns true, otherwise false. It also obtains the -/// Instruction and ConstantInt from the BinaryOperator and returns it. -bool ABCD::createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1, - Instruction **I2, ConstantInt **C1, - ConstantInt **C2) { - Value *op1 = BO->getOperand(0); - Value *op2 = BO->getOperand(1); - - if ((*I1 = dyn_cast<Instruction>(op1))) { - if ((*C2 = dyn_cast<ConstantInt>(op2))) - return true; // First is Instruction and second ConstantInt - - return false; // Both are Instruction - } else { - if ((*C1 = dyn_cast<ConstantInt>(op1)) && - (*I2 = dyn_cast<Instruction>(op2))) - return true; // First is ConstantInt and second Instruction - - return false; // Both are not Instruction - } -} - -/// Creates constraints for Comparator Instructions. -/// Only comparators that have any of the following operators -/// are used to create constraints: >=, >, <=, <. And only if -/// at least one operand is an Instruction. In a Comparator Instruction -/// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where -/// t and f represent sigma for operands in true and false branches. The -/// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and -/// b_f <= b. There are two more constraints that depend on the operator. -/// For the operator <= : a_t <= b_t and b_f <= a_f-1 -/// For the operator < : a_t <= b_t-1 and b_f <= a_f -/// For the operator >= : b_t <= a_t and a_f <= b_f-1 -/// For the operator > : b_t <= a_t-1 and a_f <= b_f -void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) { - Value *V_op1 = ICI->getOperand(0); - Value *V_op2 = ICI->getOperand(1); - - if (!V_op1->getType()->isIntegerTy()) - return; - - Instruction *I_op1 = dyn_cast<Instruction>(V_op1); - Instruction *I_op2 = dyn_cast<Instruction>(V_op2); - - // Test if at least one operand is an Instruction - if (!I_op1 && !I_op2) - return; - - BasicBlock *BB_succ_t = TI->getSuccessor(0); - BasicBlock *BB_succ_f = TI->getSuccessor(1); - - PHINode *SIG_op1_t = NULL, *SIG_op1_f = NULL, - *SIG_op2_t = NULL, *SIG_op2_f = NULL; - - createConstraintSigInst(I_op1, BB_succ_t, BB_succ_f, &SIG_op1_t, &SIG_op1_f); - createConstraintSigInst(I_op2, BB_succ_t, BB_succ_f, &SIG_op2_t, &SIG_op2_f); - - int32_t width = cast<IntegerType>(V_op1->getType())->getBitWidth(); - APInt MinusOne = APInt::getAllOnesValue(width); - APInt Zero = APInt::getNullValue(width); - - CmpInst::Predicate Pred = ICI->getPredicate(); - ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1); - ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2); - switch (Pred) { - case CmpInst::ICMP_SGT: // signed greater than - createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne); - createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero); - break; - - case CmpInst::ICMP_SGE: // signed greater or equal - createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero); - createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne); - break; - - case CmpInst::ICMP_SLT: // signed less than - createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne); - createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero); - break; - - case CmpInst::ICMP_SLE: // signed less or equal - createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero); - createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne); - break; - - default: - break; - } - - if (I_op1) - createConstraintInstruction(I_op1); - if (I_op2) - createConstraintInstruction(I_op2); -} - -/// Creates constraints for PHI nodes. -/// In a PHI node a = phi(b,c) we can create the constraint -/// a<= max(b,c). With this constraint there will be the edges, -/// b->a and c->a with weight 0 in the lower bound graph, and the edges -/// a->b and a->c with weight 0 in the upper bound graph. -void ABCD::createConstraintPHINode(PHINode *PN) { - // FIXME: We really want to disallow sigma nodes, but I don't know the best - // way to detect the other than this. - if (PN->getNumOperands() == 2) return; - - int32_t width = cast<IntegerType>(PN->getType())->getBitWidth(); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *V = PN->getIncomingValue(i); - if (Instruction *I = dyn_cast<Instruction>(V)) { - createConstraintInstruction(I); - } - inequality_graph.addEdge(V, PN, APInt(width, 0), true); - inequality_graph.addEdge(V, PN, APInt(width, 0), false); - } -} - -/// This method creates a constraint between a Sigma and an Instruction. -/// These constraints are created as soon as we find a comparator that uses a -/// SSI variable. -void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t, - BasicBlock *BB_succ_f, PHINode **SIG_op_t, - PHINode **SIG_op_f) { - *SIG_op_t = findSigma(BB_succ_t, I_op); - *SIG_op_f = findSigma(BB_succ_f, I_op); - - if (*SIG_op_t) { - int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth(); - inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true); - inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false); - } - if (*SIG_op_f) { - int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth(); - inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true); - inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false); - } -} - -/// If PN_op1 and PN_o2 are different from NULL, create a constraint -/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace -/// with the respective V_op#, if V_op# is a ConstantInt. -void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, - ConstantInt *V_op1, ConstantInt *V_op2, - APInt value) { - if (SIG_op1 && SIG_op2) { - inequality_graph.addEdge(SIG_op2, SIG_op1, value, true); - inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false); - } else if (SIG_op1 && V_op2) { - inequality_graph.addEdge(V_op2, SIG_op1, value, true); - inequality_graph.addEdge(SIG_op1, V_op2, -value, false); - } else if (SIG_op2 && V_op1) { - inequality_graph.addEdge(SIG_op2, V_op1, value, true); - inequality_graph.addEdge(V_op1, SIG_op2, -value, false); - } -} - -/// Returns the sigma representing the Instruction I in BasicBlock BB. -/// Returns NULL in case there is no sigma for this Instruction in this -/// Basic Block. This methods assume that sigmas are the first instructions -/// in a block, and that there can be only two sigmas in a block. So it will -/// only look on the first two instructions of BasicBlock BB. -PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) { - // BB has more than one predecessor, BB cannot have sigmas. - if (I == NULL || BB->getSinglePredecessor() == NULL) - return NULL; - - BasicBlock::iterator begin = BB->begin(); - BasicBlock::iterator end = BB->end(); - - for (unsigned i = 0; i < 2 && begin != end; ++i, ++begin) { - Instruction *I_succ = begin; - if (PHINode *PN = dyn_cast<PHINode>(I_succ)) - if (PN->getIncomingValue(0) == I) - return PN; - } - - return NULL; -} - -/// Original ABCD algorithm to prove redundant checks. -/// This implementation works on any kind of inequality branch. -bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) { - int32_t width = cast<IntegerType>(a->getType())->getBitWidth(); - Bound bound(APInt(width, c), upper_bound); - - mem_result.clear(); - active.clear(); - - ProveResult res = prove(a, b, bound, 0); - return res != False; -} - -/// Prove that distance between b and a is <= bound -ABCD::ProveResult ABCD::prove(Value *a, Value *b, const Bound &bound, - unsigned level) { - // if (C[b-a<=e] == True for some e <= bound - // Same or stronger difference was already proven - if (mem_result.hasTrue(b, bound)) - return True; - - // if (C[b-a<=e] == False for some e >= bound - // Same or weaker difference was already disproved - if (mem_result.hasFalse(b, bound)) - return False; - - // if (C[b-a<=e] == Reduced for some e <= bound - // b is on a cycle that was reduced for same or stronger difference - if (mem_result.hasReduced(b, bound)) - return Reduced; - - // traversal reached the source vertex - if (a == b && Bound::geq(bound, APInt(bound.getBitWidth(), 0, true))) - return True; - - // if b has no predecessor then fail - if (!inequality_graph.hasEdge(b, bound.isUpperBound())) - return False; - - // a cycle was encountered - if (active.count(b)) { - if (Bound::leq(*active.lookup(b), bound)) - return Reduced; // a "harmless" cycle - - return False; // an amplifying cycle - } - - active[b] = &bound; - PHINode *PN = dyn_cast<PHINode>(b); - - // Test if a Value is a Phi. If it is a PHINode with more than 1 incoming - // value, then it is a phi, if it has 1 incoming value it is a sigma. - if (PN && PN->getNumIncomingValues() > 1) - updateMemDistance(a, b, bound, level, min); - else - updateMemDistance(a, b, bound, level, max); - - active.erase(b); - - ABCD::ProveResult res = mem_result.getBoundResult(b, bound); - return res; -} - -/// Updates the distance value for a and b -void ABCD::updateMemDistance(Value *a, Value *b, const Bound &bound, - unsigned level, meet_function meet) { - ABCD::ProveResult res = (meet == max) ? False : True; - - SmallVector<Edge, 16> Edges = inequality_graph.getEdges(b); - SmallVector<Edge, 16>::iterator begin = Edges.begin(), end = Edges.end(); - - for (; begin != end ; ++begin) { - if (((res >= Reduced) && (meet == max)) || - ((res == False) && (meet == min))) { - break; - } - const Edge &in = *begin; - if (in.isUpperBound() == bound.isUpperBound()) { - Value *succ = in.getVertex(); - res = meet(res, prove(a, succ, Bound(bound, in.getValue()), - level+1)); - } - } - - mem_result.updateBound(b, bound, res); -} - -/// Return the stored result for this bound -ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound &bound)const{ - if (max_false && Bound::leq(bound, *max_false)) - return False; - if (min_true && Bound::leq(*min_true, bound)) - return True; - if (min_reduced && Bound::leq(*min_reduced, bound)) - return Reduced; - return False; -} - -/// Stores a false found -void ABCD::MemoizedResultChart::addFalse(const Bound &bound) { - if (!max_false || Bound::leq(*max_false, bound)) - max_false.reset(new Bound(bound)); - - if (Bound::eq(max_false.get(), min_reduced.get())) - min_reduced.reset(new Bound(Bound::createIncrement(*min_reduced))); - if (Bound::eq(max_false.get(), min_true.get())) - min_true.reset(new Bound(Bound::createIncrement(*min_true))); - if (Bound::eq(min_reduced.get(), min_true.get())) - min_reduced.reset(); - clearRedundantReduced(); -} - -/// Stores a true found -void ABCD::MemoizedResultChart::addTrue(const Bound &bound) { - if (!min_true || Bound::leq(bound, *min_true)) - min_true.reset(new Bound(bound)); - - if (Bound::eq(min_true.get(), min_reduced.get())) - min_reduced.reset(new Bound(Bound::createDecrement(*min_reduced))); - if (Bound::eq(min_true.get(), max_false.get())) - max_false.reset(new Bound(Bound::createDecrement(*max_false))); - if (Bound::eq(max_false.get(), min_reduced.get())) - min_reduced.reset(); - clearRedundantReduced(); -} - -/// Stores a Reduced found -void ABCD::MemoizedResultChart::addReduced(const Bound &bound) { - if (!min_reduced || Bound::leq(bound, *min_reduced)) - min_reduced.reset(new Bound(bound)); - - if (Bound::eq(min_reduced.get(), min_true.get())) - min_true.reset(new Bound(Bound::createIncrement(*min_true))); - if (Bound::eq(min_reduced.get(), max_false.get())) - max_false.reset(new Bound(Bound::createDecrement(*max_false))); -} - -/// Clears redundant reduced -/// If a min_true is smaller than a min_reduced then the min_reduced -/// is unnecessary and then removed. It also works for min_reduced -/// begin smaller than max_false. -void ABCD::MemoizedResultChart::clearRedundantReduced() { - if (min_true && min_reduced && Bound::lt(*min_true, *min_reduced)) - min_reduced.reset(); - if (max_false && min_reduced && Bound::lt(*min_reduced, *max_false)) - min_reduced.reset(); -} - -/// Stores the bound found -void ABCD::MemoizedResult::updateBound(Value *b, const Bound &bound, - const ProveResult res) { - if (res == False) { - map[b].addFalse(bound); - } else if (res == True) { - map[b].addTrue(bound); - } else { - map[b].addReduced(bound); - } -} - -/// Adds an edge from V_from to V_to with weight value -void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from, - APInt value, bool upper) { - assert(V_from->getType() == V_to->getType()); - assert(cast<IntegerType>(V_from->getType())->getBitWidth() == - value.getBitWidth()); - - graph[V_from].push_back(Edge(V_to, value, upper)); -} - -/// Test if there is any edge from V in the upper direction -bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const { - SmallVector<Edge, 16> it = graph.lookup(V); - - SmallVector<Edge, 16>::iterator begin = it.begin(); - SmallVector<Edge, 16>::iterator end = it.end(); - for (; begin != end; ++begin) { - if (begin->isUpperBound() == upper) { - return true; - } - } - return false; -} - -/// Prints the header of the dot file -void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const { - OS << "digraph dotgraph {\n"; - OS << "label=\"Inequality Graph for \'"; - OS << F.getNameStr() << "\' function\";\n"; - OS << "node [shape=record,fontname=\"Times-Roman\",fontsize=14];\n"; -} - -/// Prints the body of the dot file -void ABCD::InequalityGraph::printBody(raw_ostream &OS) const { - DenseMap<Value *, SmallVector<Edge, 16> >::const_iterator begin = - graph.begin(), end = graph.end(); - - for (; begin != end ; ++begin) { - SmallVector<Edge, 16>::const_iterator begin_par = - begin->second.begin(), end_par = begin->second.end(); - Value *source = begin->first; - - printVertex(OS, source); - - for (; begin_par != end_par ; ++begin_par) { - const Edge &edge = *begin_par; - printEdge(OS, source, edge); - } - } -} - -/// Prints vertex source to the dot file -/// -void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const { - OS << "\""; - printName(OS, source); - OS << "\""; - OS << " [label=\"{"; - printName(OS, source); - OS << "}\"];\n"; -} - -/// Prints the edge to the dot file -void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source, - const Edge &edge) const { - Value *dest = edge.getVertex(); - APInt value = edge.getValue(); - bool upper = edge.isUpperBound(); - - OS << "\""; - printName(OS, source); - OS << "\""; - OS << " -> "; - OS << "\""; - printName(OS, dest); - OS << "\""; - OS << " [label=\"" << value << "\""; - if (upper) { - OS << "color=\"blue\""; - } else { - OS << "color=\"red\""; - } - OS << "];\n"; -} - -void ABCD::InequalityGraph::printName(raw_ostream &OS, Value *info) const { - if (ConstantInt *CI = dyn_cast<ConstantInt>(info)) { - OS << *CI; - } else { - if (!info->hasName()) { - info->setName("V"); - } - OS << info->getNameStr(); - } -} - -/// createABCDPass - The public interface to this file... -FunctionPass *llvm::createABCDPass() { - return new ABCD(); -} diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index 2d19467ce7462..ada086e9db766 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -33,7 +33,7 @@ STATISTIC(NumRemoved, "Number of instructions removed"); namespace { struct ADCE : public FunctionPass { static char ID; // Pass identification, replacement for typeid - ADCE() : FunctionPass(&ID) {} + ADCE() : FunctionPass(ID) {} virtual bool runOnFunction(Function& F); @@ -45,7 +45,7 @@ namespace { } char ADCE::ID = 0; -static RegisterPass<ADCE> X("adce", "Aggressive Dead Code Elimination"); +INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false); bool ADCE::runOnFunction(Function& F) { SmallPtrSet<Instruction*, 128> alive; diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp index 54533f50405f3..b144678c6a0ed 100644 --- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp +++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp @@ -41,7 +41,7 @@ STATISTIC(NumMoved, "Number of basic blocks moved"); namespace { struct BlockPlacement : public FunctionPass { static char ID; // Pass identification, replacement for typeid - BlockPlacement() : FunctionPass(&ID) {} + BlockPlacement() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -74,8 +74,8 @@ namespace { } char BlockPlacement::ID = 0; -static RegisterPass<BlockPlacement> -X("block-placement", "Profile Guided Basic Block Placement"); +INITIALIZE_PASS(BlockPlacement, "block-placement", + "Profile Guided Basic Block Placement", false, false); FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); } diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 1a3b10cc9baaf..b7598eace536b 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -1,9 +1,9 @@ add_llvm_library(LLVMScalarOpts - ABCD.cpp ADCE.cpp BasicBlockPlacement.cpp CodeGenPrepare.cpp ConstantProp.cpp + CorrelatedValuePropagation.cpp DCE.cpp DeadStoreElimination.cpp GEPSplitter.cpp @@ -17,6 +17,7 @@ add_llvm_library(LLVMScalarOpts LoopStrengthReduce.cpp LoopUnrollPass.cpp LoopUnswitch.cpp + LowerAtomic.cpp MemCpyOptimizer.cpp Reassociate.cpp Reg2Mem.cpp diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 272066c8c0c4b..e07b761e589c1 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" @@ -41,6 +42,11 @@ using namespace llvm; using namespace llvm::PatternMatch; +static cl::opt<bool> +CriticalEdgeSplit("cgp-critical-edge-splitting", + cl::desc("Split critical edges during codegen prepare"), + cl::init(true), cl::Hidden); + namespace { class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -54,7 +60,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetLowering *tli = 0) - : FunctionPass(&ID), TLI(tli) {} + : FunctionPass(ID), TLI(tli) {} bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -82,8 +88,8 @@ namespace { } char CodeGenPrepare::ID = 0; -static RegisterPass<CodeGenPrepare> X("codegenprepare", - "Optimize for code generation"); +INITIALIZE_PASS(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false); FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { return new CodeGenPrepare(TLI); @@ -427,9 +433,9 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // If these values will be promoted, find out what they will be promoted // to. This helps us consider truncates on PPC as noop copies when they // are. - if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote) + if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); - if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote) + if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); // If, after promotion, these are the same types, this is a noop copy. @@ -548,9 +554,9 @@ protected: CI->eraseFromParent(); } bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { - if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - - CallInst::ArgOffset))) - return SizeCI->isAllOnesValue(); + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) + return SizeCI->isAllOnesValue(); return false; } }; @@ -891,12 +897,14 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { bool MadeChange = false; // Split all critical edges where the dest block has a PHI. - TerminatorInst *BBTI = BB.getTerminator(); - if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { - for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { - BasicBlock *SuccBB = BBTI->getSuccessor(i); - if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) - SplitEdgeNicely(BBTI, i, BackEdges, this); + if (CriticalEdgeSplit) { + TerminatorInst *BBTI = BB.getTerminator(); + if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { + for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { + BasicBlock *SuccBB = BBTI->getSuccessor(i); + if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) + SplitEdgeNicely(BBTI, i, BackEdges, this); + } } } diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index ea208135739d5..a0ea369d0cadd 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -34,7 +34,7 @@ STATISTIC(NumInstKilled, "Number of instructions killed"); namespace { struct ConstantPropagation : public FunctionPass { static char ID; // Pass identification, replacement for typeid - ConstantPropagation() : FunctionPass(&ID) {} + ConstantPropagation() : FunctionPass(ID) {} bool runOnFunction(Function &F); @@ -45,8 +45,8 @@ namespace { } char ConstantPropagation::ID = 0; -static RegisterPass<ConstantPropagation> -X("constprop", "Simple constant propagation"); +INITIALIZE_PASS(ConstantPropagation, "constprop", + "Simple constant propagation", false, false); FunctionPass *llvm::createConstantPropagationPass() { return new ConstantPropagation(); diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp new file mode 100644 index 0000000000000..0d4e45de34664 --- /dev/null +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -0,0 +1,200 @@ +//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Correlated Value Propagation pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "correlated-value-propagation" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumPhis, "Number of phis propagated"); +STATISTIC(NumSelects, "Number of selects propagated"); +STATISTIC(NumMemAccess, "Number of memory access targets propagated"); +STATISTIC(NumCmps, "Number of comparisons propagated"); + +namespace { + class CorrelatedValuePropagation : public FunctionPass { + LazyValueInfo *LVI; + + bool processSelect(SelectInst *SI); + bool processPHI(PHINode *P); + bool processMemAccess(Instruction *I); + bool processCmp(CmpInst *C); + + public: + static char ID; + CorrelatedValuePropagation(): FunctionPass(ID) { } + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LazyValueInfo>(); + } + }; +} + +char CorrelatedValuePropagation::ID = 0; +INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation", + "Value Propagation", false, false); + +// Public interface to the Value Propagation pass +Pass *llvm::createCorrelatedValuePropagationPass() { + return new CorrelatedValuePropagation(); +} + +bool CorrelatedValuePropagation::processSelect(SelectInst *S) { + if (S->getType()->isVectorTy()) return false; + if (isa<Constant>(S->getOperand(0))) return false; + + Constant *C = LVI->getConstant(S->getOperand(0), S->getParent()); + if (!C) return false; + + ConstantInt *CI = dyn_cast<ConstantInt>(C); + if (!CI) return false; + + S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2)); + S->eraseFromParent(); + + ++NumSelects; + + return true; +} + +bool CorrelatedValuePropagation::processPHI(PHINode *P) { + bool Changed = false; + + BasicBlock *BB = P->getParent(); + for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { + Value *Incoming = P->getIncomingValue(i); + if (isa<Constant>(Incoming)) continue; + + Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i), + P->getIncomingBlock(i), + BB); + if (!C) continue; + + P->setIncomingValue(i, C); + Changed = true; + } + + if (Value *ConstVal = P->hasConstantValue()) { + P->replaceAllUsesWith(ConstVal); + P->eraseFromParent(); + Changed = true; + } + + ++NumPhis; + + return Changed; +} + +bool CorrelatedValuePropagation::processMemAccess(Instruction *I) { + Value *Pointer = 0; + if (LoadInst *L = dyn_cast<LoadInst>(I)) + Pointer = L->getPointerOperand(); + else + Pointer = cast<StoreInst>(I)->getPointerOperand(); + + if (isa<Constant>(Pointer)) return false; + + Constant *C = LVI->getConstant(Pointer, I->getParent()); + if (!C) return false; + + ++NumMemAccess; + I->replaceUsesOfWith(Pointer, C); + return true; +} + +/// processCmp - If the value of this comparison could be determined locally, +/// constant propagation would already have figured it out. Instead, walk +/// the predecessors and statically evaluate the comparison based on information +/// available on that edge. If a given static evaluation is true on ALL +/// incoming edges, then it's true universally and we can simplify the compare. +bool CorrelatedValuePropagation::processCmp(CmpInst *C) { + Value *Op0 = C->getOperand(0); + if (isa<Instruction>(Op0) && + cast<Instruction>(Op0)->getParent() == C->getParent()) + return false; + + Constant *Op1 = dyn_cast<Constant>(C->getOperand(1)); + if (!Op1) return false; + + pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent()); + if (PI == PE) return false; + + LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(), + C->getOperand(0), Op1, *PI, C->getParent()); + if (Result == LazyValueInfo::Unknown) return false; + + ++PI; + while (PI != PE) { + LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(), + C->getOperand(0), Op1, *PI, C->getParent()); + if (Res != Result) return false; + ++PI; + } + + ++NumCmps; + + if (Result == LazyValueInfo::True) + C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext())); + else + C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext())); + + C->eraseFromParent(); + + return true; +} + +bool CorrelatedValuePropagation::runOnFunction(Function &F) { + LVI = &getAnalysis<LazyValueInfo>(); + + bool FnChanged = false; + + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + bool BBChanged = false; + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) { + Instruction *II = BI++; + switch (II->getOpcode()) { + case Instruction::Select: + BBChanged |= processSelect(cast<SelectInst>(II)); + break; + case Instruction::PHI: + BBChanged |= processPHI(cast<PHINode>(II)); + break; + case Instruction::ICmp: + case Instruction::FCmp: + BBChanged |= processCmp(cast<CmpInst>(II)); + break; + case Instruction::Load: + case Instruction::Store: + BBChanged |= processMemAccess(II); + break; + } + } + + // Propagating correlated values might leave cruft around. + // Try to clean it up before we continue. + if (BBChanged) + SimplifyInstructionsInBlock(FI); + + FnChanged |= BBChanged; + } + + return FnChanged; +} diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 39940c35da5d5..87ea8038356ae 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -35,7 +35,7 @@ namespace { // struct DeadInstElimination : public BasicBlockPass { static char ID; // Pass identification, replacement for typeid - DeadInstElimination() : BasicBlockPass(&ID) {} + DeadInstElimination() : BasicBlockPass(ID) {} virtual bool runOnBasicBlock(BasicBlock &BB) { bool Changed = false; for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) { @@ -56,8 +56,8 @@ namespace { } char DeadInstElimination::ID = 0; -static RegisterPass<DeadInstElimination> -X("die", "Dead Instruction Elimination"); +INITIALIZE_PASS(DeadInstElimination, "die", + "Dead Instruction Elimination", false, false); Pass *llvm::createDeadInstEliminationPass() { return new DeadInstElimination(); @@ -70,7 +70,7 @@ namespace { // struct DCE : public FunctionPass { static char ID; // Pass identification, replacement for typeid - DCE() : FunctionPass(&ID) {} + DCE() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -81,7 +81,7 @@ namespace { } char DCE::ID = 0; -static RegisterPass<DCE> Y("dce", "Dead Code Elimination"); +INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false); bool DCE::runOnFunction(Function &F) { // Start out with all of the instructions in the worklist... diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index e047e4ffa151c..c8fd9d9fa5561 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -40,7 +40,7 @@ namespace { TargetData *TD; static char ID; // Pass identification, replacement for typeid - DSE() : FunctionPass(&ID) {} + DSE() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F) { bool Changed = false; @@ -82,7 +82,7 @@ namespace { } char DSE::ID = 0; -static RegisterPass<DSE> X("dse", "Dead Store Elimination"); +INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false); FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } @@ -401,10 +401,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } continue; - } else if (CallSite::get(BBI).getInstruction() != 0) { + } else if (CallSite CS = cast<Value>(BBI)) { // If this call does not access memory, it can't // be undeadifying any of our pointers. - CallSite CS = CallSite::get(BBI); if (AA.doesNotAccessMemory(CS)) continue; diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp index 610a41dae44b1..53dd06d24bb5b 100644 --- a/lib/Transforms/Scalar/GEPSplitter.cpp +++ b/lib/Transforms/Scalar/GEPSplitter.cpp @@ -27,13 +27,13 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const; public: static char ID; // Pass identification, replacement for typeid - explicit GEPSplitter() : FunctionPass(&ID) {} + explicit GEPSplitter() : FunctionPass(ID) {} }; } char GEPSplitter::ID = 0; -static RegisterPass<GEPSplitter> X("split-geps", - "split complex GEPs into simple GEPs"); +INITIALIZE_PASS(GEPSplitter, "split-geps", + "split complex GEPs into simple GEPs", false, false); FunctionPass *llvm::createGEPSplitterPass() { return new GEPSplitter(); diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 88b67768fa5df..c62ce1f27f647 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -165,7 +165,6 @@ namespace { Expression create_expression(CastInst* C); Expression create_expression(GetElementPtrInst* G); Expression create_expression(CallInst* C); - Expression create_expression(Constant* C); Expression create_expression(ExtractValueInst* C); Expression create_expression(InsertValueInst* C); @@ -665,7 +664,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit GVN(bool noloads = false) - : FunctionPass(&ID), NoLoads(noloads), MD(0) { } + : FunctionPass(ID), NoLoads(noloads), MD(0) { } private: bool NoLoads; @@ -716,8 +715,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) { return new GVN(NoLoads); } -static RegisterPass<GVN> X("gvn", - "Global Value Numbering"); +INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false); void GVN::dump(DenseMap<uint32_t, Value*>& d) { errs() << "{\n"; @@ -735,7 +733,7 @@ static bool isSafeReplacement(PHINode* p, Instruction *inst) { for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); UI != E; ++UI) - if (PHINode* use_phi = dyn_cast<PHINode>(UI)) + if (PHINode* use_phi = dyn_cast<PHINode>(*UI)) if (use_phi->getParent() == inst->getParent()) return false; @@ -1312,7 +1310,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, // Otherwise, we have to construct SSA form. SmallVector<PHINode*, 8> NewPHIs; SSAUpdater SSAUpdate(&NewPHIs); - SSAUpdate.Initialize(LI); + SSAUpdate.Initialize(LI->getType(), LI->getName()); const Type *LoadTy = LI->getType(); @@ -2112,6 +2110,11 @@ bool GVN::performPRE(Function &F) { CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa<DbgInfoIntrinsic>(CurInst)) continue; + + // We don't currently value number ANY inline asm calls. + if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) + if (CallI->isInlineAsm()) + continue; uint32_t ValNo = VN.lookup(CurInst); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index b5c9dd881df86..af2eafc47cbf3 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -77,7 +77,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(&ID) {} + IndVarSimplify() : LoopPass(ID) {} virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -102,7 +102,7 @@ namespace { void RewriteNonIntegerIVs(Loop *L); ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - Value *IndVar, + PHINode *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, SCEVExpander &Rewriter); @@ -117,8 +117,8 @@ namespace { } char IndVarSimplify::ID = 0; -static RegisterPass<IndVarSimplify> -X("indvars", "Canonicalize Induction Variables"); +INITIALIZE_PASS(IndVarSimplify, "indvars", + "Canonicalize Induction Variables", false, false); Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); @@ -131,7 +131,7 @@ Pass *llvm::createIndVarSimplifyPass() { /// is actually a much broader range than just linear tests. ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - Value *IndVar, + PHINode *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, SCEVExpander &Rewriter) { @@ -181,7 +181,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. - CmpIndVar = L->getCanonicalInductionVariableIncrement(); + CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock); } else { // We have to use the preincremented value... RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, @@ -534,7 +534,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Now that we know the largest of the induction variable expressions // in this loop, insert a canonical induction variable of the largest size. - Value *IndVar = 0; + PHINode *IndVar = 0; if (NeedCannIV) { // Check to see if the loop already has any canonical-looking induction // variables. If any are present and wider than the planned canonical @@ -862,9 +862,9 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // Check Incr uses. One user is PN and the other user is an exit condition // used by the conditional terminator. Value::use_iterator IncrUse = Incr->use_begin(); - Instruction *U1 = cast<Instruction>(IncrUse++); + Instruction *U1 = cast<Instruction>(*IncrUse++); if (IncrUse == Incr->use_end()) return; - Instruction *U2 = cast<Instruction>(IncrUse++); + Instruction *U2 = cast<Instruction>(*IncrUse++); if (IncrUse != Incr->use_end()) return; // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index edce14cd92eaf..104d5aecbdd32 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -45,7 +46,10 @@ Threshold("jump-threading-threshold", // Turn on use of LazyValueInfo. static cl::opt<bool> -EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden); +EnableLVI("enable-jump-threading-lvi", + cl::desc("Use LVI for jump threading"), + cl::init(true), + cl::ReallyHidden); @@ -74,15 +78,32 @@ namespace { #else SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders; #endif + DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet; + + // RAII helper for updating the recursion stack. + struct RecursionSetRemover { + DenseSet<std::pair<Value*, BasicBlock*> > &TheSet; + std::pair<Value*, BasicBlock*> ThePair; + + RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S, + std::pair<Value*, BasicBlock*> P) + : TheSet(S), ThePair(P) { } + + ~RecursionSetRemover() { + TheSet.erase(ThePair); + } + }; public: static char ID; // Pass identification - JumpThreading() : FunctionPass(&ID) {} + JumpThreading() : FunctionPass(ID) {} bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - if (EnableLVI) + if (EnableLVI) { AU.addRequired<LazyValueInfo>(); + AU.addPreserved<LazyValueInfo>(); + } } void FindLoopHeaders(Function &F); @@ -111,8 +132,8 @@ namespace { } char JumpThreading::ID = 0; -static RegisterPass<JumpThreading> -X("jump-threading", "Jump Threading"); +INITIALIZE_PASS(JumpThreading, "jump-threading", + "Jump Threading", false, false); // Public interface to the Jump Threading pass FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } @@ -144,6 +165,7 @@ bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName() << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); + if (LVI) LVI->eraseBlock(BB); DeleteDeadBlock(BB); Changed = true; } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { @@ -164,6 +186,11 @@ bool JumpThreading::runOnFunction(Function &F) { bool ErasedFromLoopHeaders = LoopHeaders.erase(BB); BasicBlock *Succ = BI->getSuccessor(0); + // FIXME: It is always conservatively correct to drop the info + // for a block even if it doesn't get erased. This isn't totally + // awesome, but it allows us to use AssertingVH to prevent nasty + // dangling pointer issues within LazyValueInfo. + if (LVI) LVI->eraseBlock(BB); if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) { Changed = true; // If we deleted BB and BB was the header of a loop, then the @@ -251,6 +278,17 @@ void JumpThreading::FindLoopHeaders(Function &F) { LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second)); } +// Helper method for ComputeValueKnownInPredecessors. If Value is a +// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing. +static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*, + BasicBlock*> > &Result, + Constant *Value, BasicBlock* BB){ + if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value)) + Result.push_back(std::make_pair(FoldedCInt, BB)); + else if (isa<UndefValue>(Value)) + Result.push_back(std::make_pair((ConstantInt*)0, BB)); +} + /// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see /// if we can infer that the value is a known ConstantInt in any of our /// predecessors. If so, return the known list of value and pred BB in the @@ -260,12 +298,24 @@ void JumpThreading::FindLoopHeaders(Function &F) { /// bool JumpThreading:: ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ + // This method walks up use-def chains recursively. Because of this, we could + // get into an infinite loop going around loops in the use-def chain. To + // prevent this, keep track of what (value, block) pairs we've already visited + // and terminate the search if we loop back to them + if (!RecursionSet.insert(std::make_pair(V, BB)).second) + return false; + + // An RAII help to remove this pair from the recursion set once the recursion + // stack pops back out again. + RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB)); + // If V is a constantint, then it is known in all predecessors. if (isa<ConstantInt>(V) || isa<UndefValue>(V)) { ConstantInt *CI = dyn_cast<ConstantInt>(V); for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) Result.push_back(std::make_pair(CI, *PI)); + return true; } @@ -313,8 +363,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) { ConstantInt *CI = dyn_cast<ConstantInt>(InVal); Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i))); + } else if (LVI) { + Constant *CI = LVI->getConstantOnEdge(InVal, + PN->getIncomingBlock(i), BB); + // LVI returns null is no value could be determined. + if (!CI) continue; + PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i)); } } + return !Result.empty(); } @@ -338,29 +395,26 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ else InterestingVal = ConstantInt::getFalse(I->getContext()); + SmallPtrSet<BasicBlock*, 4> LHSKnownBBs; + // Scan for the sentinel. If we find an undef, force it to the // interesting value: x|undef -> true and x&undef -> false. for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) { Result.push_back(LHSVals[i]); Result.back().first = InterestingVal; + LHSKnownBBs.insert(LHSVals[i].second); } for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) { // If we already inferred a value for this block on the LHS, don't // re-add it. - bool HasValue = false; - for (unsigned r = 0, e = Result.size(); r != e; ++r) - if (Result[r].second == RHSVals[i].second) { - HasValue = true; - break; - } - - if (!HasValue) { + if (!LHSKnownBBs.count(RHSVals[i].second)) { Result.push_back(RHSVals[i]); Result.back().first = InterestingVal; } } + return !Result.empty(); } @@ -377,8 +431,27 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ if (Result[i].first) Result[i].first = cast<ConstantInt>(ConstantExpr::getNot(Result[i].first)); + return true; } + + // Try to simplify some other binary operator values. + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) { + SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals; + ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals); + + // Try to use constant folding to simplify the binary operator. + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { + Constant *V = LHSVals[i].first ? LHSVals[i].first : + cast<Constant>(UndefValue::get(BO->getType())); + Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI); + + PushConstantIntOrUndef(Result, Folded, LHSVals[i].second); + } + } + + return !Result.empty(); } // Handle compare with phi operand, where the PHI is defined in this block. @@ -405,10 +478,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT); } - if (isa<UndefValue>(Res)) - Result.push_back(std::make_pair((ConstantInt*)0, PredBB)); - else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res)) - Result.push_back(std::make_pair(CI, PredBB)); + if (Constant *ConstRes = dyn_cast<Constant>(Res)) + PushConstantIntOrUndef(Result, ConstRes, PredBB); } return !Result.empty(); @@ -418,28 +489,59 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ // If comparing a live-in value against a constant, see if we know the // live-in value on any predecessors. if (LVI && isa<Constant>(Cmp->getOperand(1)) && - Cmp->getType()->isIntegerTy() && // Not vector compare. - (!isa<Instruction>(Cmp->getOperand(0)) || - cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) { - Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); + Cmp->getType()->isIntegerTy()) { + if (!isa<Instruction>(Cmp->getOperand(0)) || + cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) { + Constant *RHSCst = cast<Constant>(Cmp->getOperand(1)); + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){ + BasicBlock *P = *PI; + // If the value is known by LazyValueInfo to be a constant in a + // predecessor, use that information to try to thread this block. + LazyValueInfo::Tristate Res = + LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), + RHSCst, P, BB); + if (Res == LazyValueInfo::Unknown) + continue; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; - // If the value is known by LazyValueInfo to be a constant in a - // predecessor, use that information to try to thread this block. - LazyValueInfo::Tristate - Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), - RHSCst, P, BB); - if (Res == LazyValueInfo::Unknown) - continue; + Constant *ResC = ConstantInt::get(Cmp->getType(), Res); + Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P)); + } - Constant *ResC = ConstantInt::get(Cmp->getType(), Res); - Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P)); + return !Result.empty(); } - - return !Result.empty(); + + // Try to find a constant value for the LHS of a comparison, + // and evaluate it statically if we can. + if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) { + SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals; + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals); + + for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) { + Constant *V = LHSVals[i].first ? LHSVals[i].first : + cast<Constant>(UndefValue::get(CmpConst->getType())); + Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), + V, CmpConst); + PushConstantIntOrUndef(Result, Folded, LHSVals[i].second); + } + + return !Result.empty(); + } + } + } + + if (LVI) { + // If all else fails, see if LVI can figure out a constant value for us. + Constant *CI = LVI->getConstant(V, BB); + ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI); + if (CInt) { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Result.push_back(std::make_pair(CInt, *PI)); } + + return !Result.empty(); } + return false; } @@ -490,6 +592,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + if (LVI) LVI->eraseBlock(SinglePred); MergeBasicBlockIntoOnlyPred(BB); if (isEntry && BB != &BB->getParent()->getEntryBlock()) @@ -603,6 +706,44 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } } } + + // For a comparison where the LHS is outside this block, it's possible + // that we've branched on it before. Used LVI to see if we can simplify + // the branch based on that. + BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator()); + Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1)); + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE && + (!isa<Instruction>(CondCmp->getOperand(0)) || + cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) { + // For predecessor edge, determine if the comparison is true or false + // on that edge. If they're all true or all false, we can simplify the + // branch. + // FIXME: We could handle mixed true/false by duplicating code. + LazyValueInfo::Tristate Baseline = + LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0), + CondConst, *PI, BB); + if (Baseline != LazyValueInfo::Unknown) { + // Check that all remaining incoming values match the first one. + while (++PI != PE) { + LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge( + CondCmp->getPredicate(), + CondCmp->getOperand(0), + CondConst, *PI, BB); + if (Ret != Baseline) break; + } + + // If we terminated early, then one of the values didn't match. + if (PI == PE) { + unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0; + unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1; + RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD); + BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); + CondBr->eraseFromParent(); + return true; + } + } + } } // Check for some cases that are worth simplifying. Right now we want to look @@ -1020,6 +1161,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) { SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues; if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues)) return false; + assert(!PredValues.empty() && "ComputeValueKnownInPredecessors returned true with no values"); @@ -1314,6 +1456,9 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, << ", across block:\n " << *BB << "\n"); + if (LVI) + LVI->threadEdge(PredBB, BB, SuccBB); + // We are going to have to map operands from the original BB block to the new // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to // account for entry from PredBB. @@ -1383,7 +1528,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. - SSAUpdate.Initialize(I); + SSAUpdate.Initialize(I->getType(), I->getName()); SSAUpdate.AddAvailableValue(BB, I); SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]); @@ -1538,7 +1683,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // We found a use of I outside of BB. Rename all uses of I that are outside // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks // with the two values we know. - SSAUpdate.Initialize(I); + SSAUpdate.Initialize(I->getType(), I->getName()); SSAUpdate.AddAvailableValue(BB, I); SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]); diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 73473952912ee..2ef85446bd9ba 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -26,8 +26,7 @@ // pointer. There are no calls in the loop which mod/ref the pointer. // If these conditions are true, we can promote the loads and stores in the // loop of the pointer to use a temporary alloca'd variable. We then use -// the mem2reg functionality to construct the appropriate SSA form for the -// variable. +// the SSAUpdater to construct the appropriate SSA form for the value. // //===----------------------------------------------------------------------===// @@ -37,14 +36,15 @@ #include "llvm/DerivedTypes.h" #include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -66,7 +66,7 @@ DisablePromotion("disable-licm-promotion", cl::Hidden, namespace { struct LICM : public LoopPass { static char ID; // Pass identification, replacement for typeid - LICM() : LoopPass(&ID) {} + LICM() : LoopPass(ID) {} virtual bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -75,39 +75,31 @@ namespace { /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(LoopSimplifyID); - AU.addRequired<LoopInfo>(); AU.addRequired<DominatorTree>(); - AU.addRequired<DominanceFrontier>(); // For scalar promotion (mem2reg) + AU.addRequired<LoopInfo>(); + AU.addRequiredID(LoopSimplifyID); AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); - AU.addPreserved<DominanceFrontier>(); AU.addPreservedID(LoopSimplifyID); } bool doFinalization() { - // Free the values stored in the map - for (std::map<Loop *, AliasSetTracker *>::iterator - I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I) - delete I->second; - - LoopToAliasMap.clear(); + assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets"); return false; } private: - // Various analyses that we use... AliasAnalysis *AA; // Current AliasAnalysis information LoopInfo *LI; // Current LoopInfo - DominatorTree *DT; // Dominator Tree for the current Loop... - DominanceFrontier *DF; // Current Dominance Frontier + DominatorTree *DT; // Dominator Tree for the current Loop. - // State that is updated as we process loops + // State that is updated as we process loops. bool Changed; // Set to true when we change anything. BasicBlock *Preheader; // The preheader block of the current loop... Loop *CurLoop; // The current loop we are working on... AliasSetTracker *CurAST; // AliasSet information for the current loop... - std::map<Loop *, AliasSetTracker *> LoopToAliasMap; + DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap; /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L); @@ -204,25 +196,12 @@ namespace { bool isLoopInvariantInst(Instruction &I); bool isNotUsedInLoop(Instruction &I); - /// PromoteValuesInLoop - Look at the stores in the loop and promote as many - /// to scalars as we can. - /// - void PromoteValuesInLoop(); - - /// FindPromotableValuesInLoop - Check the current loop for stores to - /// definite pointers, which are not loaded and stored through may aliases. - /// If these are found, create an alloca for the value, add it to the - /// PromotedValues list, and keep track of the mapping from value to - /// alloca... - /// - void FindPromotableValuesInLoop( - std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, - std::map<Value*, AllocaInst*> &Val2AlMap); + void PromoteAliasSet(AliasSet &AS); }; } char LICM::ID = 0; -static RegisterPass<LICM> X("licm", "Loop Invariant Code Motion"); +INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false); Pass *llvm::createLICMPass() { return new LICM(); } @@ -236,19 +215,23 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Get our Loop and Alias Analysis information... LI = &getAnalysis<LoopInfo>(); AA = &getAnalysis<AliasAnalysis>(); - DF = &getAnalysis<DominanceFrontier>(); DT = &getAnalysis<DominatorTree>(); CurAST = new AliasSetTracker(*AA); - // Collect Alias info from subloops + // Collect Alias info from subloops. for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); LoopItr != LoopItrE; ++LoopItr) { Loop *InnerL = *LoopItr; - AliasSetTracker *InnerAST = LoopToAliasMap[InnerL]; - assert (InnerAST && "Where is my AST?"); + AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL]; + assert(InnerAST && "Where is my AST?"); // What if InnerLoop was modified by other passes ? CurAST->add(*InnerAST); + + // Once we've incorporated the inner loop's AST into ours, we don't need the + // subloop's anymore. + delete InnerAST; + LoopToAliasSetMap.erase(InnerL); } CurLoop = L; @@ -263,7 +246,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; - if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops... + if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops. CurAST->add(*BB); // Incorporate the specified basic block } @@ -283,15 +266,24 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { HoistRegion(DT->getNode(L->getHeader())); // Now that all loop invariants have been removed from the loop, promote any - // memory references to scalars that we can... - if (!DisablePromotion && Preheader && L->hasDedicatedExits()) - PromoteValuesInLoop(); - + // memory references to scalars that we can. + if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { + // Loop over all of the alias sets in the tracker object. + for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); + I != E; ++I) + PromoteAliasSet(*I); + } + // Clear out loops state information for the next iteration CurLoop = 0; Preheader = 0; - LoopToAliasMap[L] = CurAST; + // If this loop is nested inside of another one, save the alias information + // for when we process the outer loop. + if (L->getParentLoop()) + LoopToAliasSetMap[L] = CurAST; + else + delete CurAST; return Changed; } @@ -308,7 +300,7 @@ void LICM::SinkRegion(DomTreeNode *N) { // If this subregion is not in the top level loop at all, exit. if (!CurLoop->contains(BB)) return; - // We are processing blocks in reverse dfo, so process children first... + // We are processing blocks in reverse dfo, so process children first. const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) SinkRegion(Children[i]); @@ -319,6 +311,17 @@ void LICM::SinkRegion(DomTreeNode *N) { for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { Instruction &I = *--II; + + // If the instruction is dead, we would try to sink it because it isn't used + // in the loop, instead, just delete it. + if (isInstructionTriviallyDead(&I)) { + DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); + ++II; + CurAST->deleteValue(&I); + I.eraseFromParent(); + Changed = true; + continue; + } // Check to see if we can sink this instruction to the exit blocks // of the loop. We can do this if the all users of the instruction are @@ -350,6 +353,18 @@ void LICM::HoistRegion(DomTreeNode *N) { for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) { Instruction &I = *II++; + // Try constant folding this instruction. If all the operands are + // constants, it is technically hoistable, but it would be better to just + // fold it. + if (Constant *C = ConstantFoldInstruction(&I)) { + DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); + CurAST->copyValue(&I, C); + CurAST->deleteValue(&I); + I.replaceAllUsesWith(C); + I.eraseFromParent(); + continue; + } + // Try hoisting the instruction out to the preheader. We can only do this // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. @@ -357,7 +372,7 @@ void LICM::HoistRegion(DomTreeNode *N) { if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) && isSafeToExecuteUnconditionally(I)) hoist(I); - } + } const std::vector<DomTreeNode*> &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) @@ -457,10 +472,10 @@ bool LICM::isLoopInvariantInst(Instruction &I) { /// position, and may either delete it or move it to outside of the loop. /// void LICM::sink(Instruction &I) { - DEBUG(dbgs() << "LICM sinking instruction: " << I); + DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getExitBlocks(ExitBlocks); + CurLoop->getUniqueExitBlocks(ExitBlocks); if (isa<LoadInst>(I)) ++NumMovedLoads; else if (isa<CallInst>(I)) ++NumMovedCalls; @@ -477,122 +492,101 @@ void LICM::sink(Instruction &I) { // If I has users in unreachable blocks, eliminate. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. - if (!I.getType()->isVoidTy()) + if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); } else { // Move the instruction to the start of the exit block, after any PHI // nodes in it. - I.removeFromParent(); - BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI(); - ExitBlocks[0]->getInstList().insert(InsertPt, &I); + I.moveBefore(ExitBlocks[0]->getFirstNonPHI()); + + // This instruction is no longer in the AST for the current loop, because + // we just sunk it out of the loop. If we just sunk it into an outer + // loop, we will rediscover the operation when we process it. + CurAST->deleteValue(&I); } - } else if (ExitBlocks.empty()) { + return; + } + + if (ExitBlocks.empty()) { // The instruction is actually dead if there ARE NO exit blocks. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. - if (!I.getType()->isVoidTy()) + if (!I.use_empty()) I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); - } else { - // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to - // do all of the hard work of inserting PHI nodes as necessary. We convert - // the value into a stack object to get it to do this. - - // Firstly, we create a stack object to hold the value... - AllocaInst *AI = 0; - - if (!I.getType()->isVoidTy()) { - AI = new AllocaInst(I.getType(), 0, I.getName(), - I.getParent()->getParent()->getEntryBlock().begin()); - CurAST->add(AI); - } - - // Secondly, insert load instructions for each use of the instruction - // outside of the loop. - while (!I.use_empty()) { - Instruction *U = cast<Instruction>(I.use_back()); - - // If the user is a PHI Node, we actually have to insert load instructions - // in all predecessor blocks, not in the PHI block itself! - if (PHINode *UPN = dyn_cast<PHINode>(U)) { - // Only insert into each predecessor once, so that we don't have - // different incoming values from the same block! - std::map<BasicBlock*, Value*> InsertedBlocks; - for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i) - if (UPN->getIncomingValue(i) == &I) { - BasicBlock *Pred = UPN->getIncomingBlock(i); - Value *&PredVal = InsertedBlocks[Pred]; - if (!PredVal) { - // Insert a new load instruction right before the terminator in - // the predecessor block. - PredVal = new LoadInst(AI, "", Pred->getTerminator()); - CurAST->add(cast<LoadInst>(PredVal)); - } - - UPN->setIncomingValue(i, PredVal); - } - - } else { - LoadInst *L = new LoadInst(AI, "", U); - U->replaceUsesOfWith(&I, L); - CurAST->add(L); - } - } - - // Thirdly, insert a copy of the instruction in each exit block of the loop - // that is dominated by the instruction, storing the result into the memory - // location. Be careful not to insert the instruction into any particular - // basic block more than once. - std::set<BasicBlock*> InsertedBlocks; - BasicBlock *InstOrigBB = I.getParent(); - - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - BasicBlock *ExitBlock = ExitBlocks[i]; - - if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) { - // If we haven't already processed this exit block, do so now. - if (InsertedBlocks.insert(ExitBlock).second) { - // Insert the code after the last PHI node... - BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); - - // If this is the first exit block processed, just move the original - // instruction, otherwise clone the original instruction and insert - // the copy. - Instruction *New; - if (InsertedBlocks.size() == 1) { - I.removeFromParent(); - ExitBlock->getInstList().insert(InsertPt, &I); - New = &I; - } else { - New = I.clone(); - CurAST->copyValue(&I, New); - if (!I.getName().empty()) - New->setName(I.getName()+".le"); - ExitBlock->getInstList().insert(InsertPt, New); - } - - // Now that we have inserted the instruction, store it into the alloca - if (AI) new StoreInst(New, AI, InsertPt); - } - } - } - - // If the instruction doesn't dominate any exit blocks, it must be dead. - if (InsertedBlocks.empty()) { - CurAST->deleteValue(&I); - I.eraseFromParent(); - } - - // Finally, promote the fine value to SSA form. - if (AI) { - std::vector<AllocaInst*> Allocas; - Allocas.push_back(AI); - PromoteMemToReg(Allocas, *DT, *DF, CurAST); + return; + } + + // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the + // hard work of inserting PHI nodes as necessary. + SmallVector<PHINode*, 8> NewPHIs; + SSAUpdater SSA(&NewPHIs); + + if (!I.use_empty()) + SSA.Initialize(I.getType(), I.getName()); + + // Insert a copy of the instruction in each exit block of the loop that is + // dominated by the instruction. Each exit block is known to only be in the + // ExitBlocks list once. + BasicBlock *InstOrigBB = I.getParent(); + unsigned NumInserted = 0; + + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + + if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) + continue; + + // Insert the code after the last PHI node. + BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); + + // If this is the first exit block processed, just move the original + // instruction, otherwise clone the original instruction and insert + // the copy. + Instruction *New; + if (NumInserted++ == 0) { + I.moveBefore(InsertPt); + New = &I; + } else { + New = I.clone(); + if (!I.getName().empty()) + New->setName(I.getName()+".le"); + ExitBlock->getInstList().insert(InsertPt, New); } + + // Now that we have inserted the instruction, inform SSAUpdater. + if (!I.use_empty()) + SSA.AddAvailableValue(ExitBlock, New); } + + // If the instruction doesn't dominate any exit blocks, it must be dead. + if (NumInserted == 0) { + CurAST->deleteValue(&I); + if (!I.use_empty()) + I.replaceAllUsesWith(UndefValue::get(I.getType())); + I.eraseFromParent(); + return; + } + + // Next, rewrite uses of the instruction, inserting PHI nodes as needed. + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { + // Grab the use before incrementing the iterator. + Use &U = UI.getUse(); + // Increment the iterator before removing the use from the list. + ++UI; + SSA.RewriteUseAfterInsertions(U); + } + + // Update CurAST for NewPHIs if I had pointer type. + if (I.getType()->isPointerTy()) + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) + CurAST->copyValue(&I, NewPHIs[i]); + + // Finally, remove the instruction from CurAST. It is no longer in the loop. + CurAST->deleteValue(&I); } /// hoist - When an instruction is found to only use loop invariant operands @@ -602,12 +596,8 @@ void LICM::hoist(Instruction &I) { DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); - // Remove the instruction from its current basic block... but don't delete the - // instruction. - I.removeFromParent(); - - // Insert the new node in Preheader, before the terminator. - Preheader->getInstList().insert(Preheader->getTerminator(), &I); + // Move the new node to the Preheader, before its terminator. + I.moveBefore(Preheader->getTerminator()); if (isa<LoadInst>(I)) ++NumMovedLoads; else if (isa<CallInst>(I)) ++NumMovedCalls; @@ -647,223 +637,269 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { return true; } - -/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking +/// PromoteAliasSet - Try to promote memory values to scalars by sinking /// stores out of the loop and moving loads to before the loop. We do this by /// looping over the stores in the loop, looking for stores to Must pointers -/// which are loop invariant. We promote these memory locations to use allocas -/// instead. These allocas can easily be raised to register values by the -/// PromoteMem2Reg functionality. +/// which are loop invariant. /// -void LICM::PromoteValuesInLoop() { - // PromotedValues - List of values that are promoted out of the loop. Each - // value has an alloca instruction for it, and a canonical version of the - // pointer. - std::vector<std::pair<AllocaInst*, Value*> > PromotedValues; - std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca - - FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap); - if (ValueToAllocaMap.empty()) return; // If there are values to promote. - - Changed = true; - NumPromoted += PromotedValues.size(); - - std::vector<Value*> PointerValueNumbers; - - // Emit a copy from the value into the alloca'd value in the loop preheader - TerminatorInst *LoopPredInst = Preheader->getTerminator(); - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { - Value *Ptr = PromotedValues[i].second; - - // If we are promoting a pointer value, update alias information for the - // inserted load. - Value *LoadValue = 0; - if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) { - // Locate a load or store through the pointer, and assign the same value - // to LI as we are loading or storing. Since we know that the value is - // stored in this loop, this will always succeed. - for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end(); - UI != E; ++UI) { - User *U = *UI; - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - LoadValue = LI; - break; - } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (SI->getOperand(1) == Ptr) { - LoadValue = SI->getOperand(0); - break; - } - } - } - assert(LoadValue && "No store through the pointer found!"); - PointerValueNumbers.push_back(LoadValue); // Remember this for later. - } - - // Load from the memory we are promoting. - LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst); - - if (LoadValue) CurAST->copyValue(LoadValue, LI); - - // Store into the temporary alloca. - new StoreInst(LI, PromotedValues[i].first, LoopPredInst); - } +void LICM::PromoteAliasSet(AliasSet &AS) { + // We can promote this alias set if it has a store, if it is a "Must" alias + // set, if the pointer is loop invariant, and if we are not eliminating any + // volatile loads or stores. + if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || + AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) + return; + + assert(!AS.empty() && + "Must alias set should have at least one pointer element in it!"); + Value *SomePtr = AS.begin()->getValue(); - // Scan the basic blocks in the loop, replacing uses of our pointers with - // uses of the allocas in question. + // It isn't safe to promote a load/store from the loop if the load/store is + // conditional. For example, turning: // - for (Loop::block_iterator I = CurLoop->block_begin(), - E = CurLoop->block_end(); I != E; ++I) { - BasicBlock *BB = *I; - // Rewrite all loads and stores in the block of the pointer... - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (LoadInst *L = dyn_cast<LoadInst>(II)) { - std::map<Value*, AllocaInst*>::iterator - I = ValueToAllocaMap.find(L->getOperand(0)); - if (I != ValueToAllocaMap.end()) - L->setOperand(0, I->second); // Rewrite load instruction... - } else if (StoreInst *S = dyn_cast<StoreInst>(II)) { - std::map<Value*, AllocaInst*>::iterator - I = ValueToAllocaMap.find(S->getOperand(1)); - if (I != ValueToAllocaMap.end()) - S->setOperand(1, I->second); // Rewrite store instruction... - } - } - } - - // Now that the body of the loop uses the allocas instead of the original - // memory locations, insert code to copy the alloca value back into the - // original memory location on all exits from the loop. Note that we only - // want to insert one copy of the code in each exit block, though the loop may - // exit to the same block more than once. + // for () { if (c) *P += 1; } // - SmallPtrSet<BasicBlock*, 16> ProcessedBlocks; - - SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getExitBlocks(ExitBlocks); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - if (!ProcessedBlocks.insert(ExitBlocks[i])) - continue; - - // Copy all of the allocas into their memory locations. - BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI(); - Instruction *InsertPos = BI; - unsigned PVN = 0; - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) { - // Load from the alloca. - LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos); - - // If this is a pointer type, update alias info appropriately. - if (LI->getType()->isPointerTy()) - CurAST->copyValue(PointerValueNumbers[PVN++], LI); - - // Store into the memory we promoted. - new StoreInst(LI, PromotedValues[i].second, InsertPos); - } - } - - // Now that we have done the deed, use the mem2reg functionality to promote - // all of the new allocas we just created into real SSA registers. + // into: // - std::vector<AllocaInst*> PromotedAllocas; - PromotedAllocas.reserve(PromotedValues.size()); - for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) - PromotedAllocas.push_back(PromotedValues[i].first); - PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST); -} - -/// FindPromotableValuesInLoop - Check the current loop for stores to definite -/// pointers, which are not loaded and stored through may aliases and are safe -/// for promotion. If these are found, create an alloca for the value, add it -/// to the PromotedValues list, and keep track of the mapping from value to -/// alloca. -void LICM::FindPromotableValuesInLoop( - std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues, - std::map<Value*, AllocaInst*> &ValueToAllocaMap) { - Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin(); - - // Loop over all of the alias sets in the tracker object. - for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); - I != E; ++I) { - AliasSet &AS = *I; - // We can promote this alias set if it has a store, if it is a "Must" alias - // set, if the pointer is loop invariant, and if we are not eliminating any - // volatile loads or stores. - if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || - AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) - continue; + // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; + // + // is not safe, because *P may only be valid to access if 'c' is true. + // + // It is safe to promote P if all uses are direct load/stores and if at + // least one is guaranteed to be executed. + bool GuaranteedToExecute = false; + + SmallVector<Instruction*, 64> LoopUses; + SmallPtrSet<Value*, 4> PointerMustAliases; + + // Check that all of the pointers in the alias set have the same type. We + // cannot (yet) promote a memory location that is loaded and stored in + // different sizes. + for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { + Value *ASIV = ASI->getValue(); + PointerMustAliases.insert(ASIV); - assert(!AS.empty() && - "Must alias set should have at least one pointer element in it!"); - Value *V = AS.begin()->getValue(); - // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. - { - bool PointerOk = true; - for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) - if (V->getType() != I->getValue()->getType()) { - PointerOk = false; - break; - } - if (!PointerOk) - continue; - } - - // It isn't safe to promote a load/store from the loop if the load/store is - // conditional. For example, turning: - // - // for () { if (c) *P += 1; } - // - // into: - // - // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; - // - // is not safe, because *P may only be valid to access if 'c' is true. - // - // It is safe to promote P if all uses are direct load/stores and if at - // least one is guaranteed to be executed. - bool GuaranteedToExecute = false; - bool InvalidInst = false; - for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); + if (SomePtr->getType() != ASIV->getType()) + return; + + for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); UI != UE; ++UI) { - // Ignore instructions not in this loop. + // Ignore instructions that are outside the loop. Instruction *Use = dyn_cast<Instruction>(*UI); if (!Use || !CurLoop->contains(Use)) continue; - - if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) { - InvalidInst = true; - break; - } + + // If there is an non-load/store instruction in the loop, we can't promote + // it. + if (isa<LoadInst>(Use)) + assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); + else if (isa<StoreInst>(Use)) { + assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); + if (Use->getOperand(0) == ASIV) return; + } else + return; // Not a load or store. if (!GuaranteedToExecute) GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); + + LoopUses.push_back(Use); } + } + + // If there isn't a guaranteed-to-execute instruction, we can't promote. + if (!GuaranteedToExecute) + return; + + // Otherwise, this is safe to promote, lets do it! + DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); + Changed = true; + ++NumPromoted; - // If there is an non-load/store instruction in the loop, we can't promote - // it. If there isn't a guaranteed-to-execute instruction, we can't - // promote. - if (InvalidInst || !GuaranteedToExecute) + // We use the SSAUpdater interface to insert phi nodes as required. + SmallVector<PHINode*, 16> NewPHIs; + SSAUpdater SSA(&NewPHIs); + + // It wants to know some value of the same type as what we'll be inserting. + Value *SomeValue; + if (isa<LoadInst>(LoopUses[0])) + SomeValue = LoopUses[0]; + else + SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0); + SSA.Initialize(SomeValue->getType(), SomeValue->getName()); + + // First step: bucket up uses of the pointers by the block they occur in. + // This is important because we have to handle multiple defs/uses in a block + // ourselves: SSAUpdater is purely for cross-block references. + // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element. + DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock; + for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { + Instruction *User = LoopUses[i]; + UsesByBlock[User->getParent()].push_back(User); + } + + // Okay, now we can iterate over all the blocks in the loop with uses, + // processing them. Keep track of which loads are loading a live-in value. + SmallVector<LoadInst*, 32> LiveInLoads; + DenseMap<Value*, Value*> ReplacedLoads; + + for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) { + Instruction *User = LoopUses[LoopUse]; + std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()]; + + // If this block has already been processed, ignore this repeat use. + if (BlockUses.empty()) continue; + + // Okay, this is the first use in the block. If this block just has a + // single user in it, we can rewrite it trivially. + if (BlockUses.size() == 1) { + // If it is a store, it is a trivial def of the value in the block. + if (isa<StoreInst>(User)) { + SSA.AddAvailableValue(User->getParent(), + cast<StoreInst>(User)->getOperand(0)); + } else { + // Otherwise it is a load, queue it to rewrite as a live-in load. + LiveInLoads.push_back(cast<LoadInst>(User)); + } + BlockUses.clear(); continue; + } - const Type *Ty = cast<PointerType>(V->getType())->getElementType(); - AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart); - PromotedValues.push_back(std::make_pair(AI, V)); + // Otherwise, check to see if this block is all loads. If so, we can queue + // them all as live in loads. + bool HasStore = false; + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) { + if (isa<StoreInst>(BlockUses[i])) { + HasStore = true; + break; + } + } + + if (!HasStore) { + for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) + LiveInLoads.push_back(cast<LoadInst>(BlockUses[i])); + BlockUses.clear(); + continue; + } - // Update the AST and alias analysis. - CurAST->copyValue(V, AI); + // Otherwise, we have mixed loads and stores (or just a bunch of stores). + // Since SSAUpdater is purely for cross-block values, we need to determine + // the order of these instructions in the block. If the first use in the + // block is a load, then it uses the live in value. The last store defines + // the live out value. We handle this by doing a linear scan of the block. + BasicBlock *BB = User->getParent(); + Value *StoredValue = 0; + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + if (LoadInst *L = dyn_cast<LoadInst>(II)) { + // If this is a load from an unrelated pointer, ignore it. + if (!PointerMustAliases.count(L->getOperand(0))) continue; + + // If we haven't seen a store yet, this is a live in use, otherwise + // use the stored value. + if (StoredValue) { + L->replaceAllUsesWith(StoredValue); + ReplacedLoads[L] = StoredValue; + } else { + LiveInLoads.push_back(L); + } + continue; + } + + if (StoreInst *S = dyn_cast<StoreInst>(II)) { + // If this is a store to an unrelated pointer, ignore it. + if (!PointerMustAliases.count(S->getOperand(1))) continue; - for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) - ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI)); + // Remember that this is the active value in the block. + StoredValue = S->getOperand(0); + } + } + + // The last stored value that happened is the live-out for the block. + assert(StoredValue && "Already checked that there is a store in block"); + SSA.AddAvailableValue(BB, StoredValue); + BlockUses.clear(); + } + + // Now that all the intra-loop values are classified, set up the preheader. + // It gets a load of the pointer we're promoting, and it is the live-out value + // from the preheader. + LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted", + Preheader->getTerminator()); + SSA.AddAvailableValue(Preheader, PreheaderLoad); + + // Now that the preheader is good to go, set up the exit blocks. Each exit + // block gets a store of the live-out values that feed them. Since we've + // already told the SSA updater about the defs in the loop and the preheader + // definition, it is all set and we can start using it. + SmallVector<BasicBlock*, 8> ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + BasicBlock *ExitBlock = ExitBlocks[i]; + Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); + Instruction *InsertPos = ExitBlock->getFirstNonPHI(); + new StoreInst(LiveInValue, SomePtr, InsertPos); + } - DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n"); + // Okay, now we rewrite all loads that use live-in values in the loop, + // inserting PHI nodes as necessary. + for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) { + LoadInst *ALoad = LiveInLoads[i]; + Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); + ALoad->replaceAllUsesWith(NewVal); + CurAST->copyValue(ALoad, NewVal); + ReplacedLoads[ALoad] = NewVal; + } + + // If the preheader load is itself a pointer, we need to tell alias analysis + // about the new pointer we created in the preheader block and about any PHI + // nodes that just got inserted. + if (PreheaderLoad->getType()->isPointerTy()) { + // Copy any value stored to or loaded from a must-alias of the pointer. + CurAST->copyValue(SomeValue, PreheaderLoad); + + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) + CurAST->copyValue(SomeValue, NewPHIs[i]); } + + // Now that everything is rewritten, delete the old instructions from the body + // of the loop. They should all be dead now. + for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) { + Instruction *User = LoopUses[i]; + + // If this is a load that still has uses, then the load must have been added + // as a live value in the SSAUpdate data structure for a block (e.g. because + // the loaded value was stored later). In this case, we need to recursively + // propagate the updates until we get to the real value. + if (!User->use_empty()) { + Value *NewVal = ReplacedLoads[User]; + assert(NewVal && "not a replaced load?"); + + // Propagate down to the ultimate replacee. The intermediately loads + // could theoretically already have been deleted, so we don't want to + // dereference the Value*'s. + DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal); + while (RLI != ReplacedLoads.end()) { + NewVal = RLI->second; + RLI = ReplacedLoads.find(NewVal); + } + + User->replaceAllUsesWith(NewVal); + CurAST->copyValue(User, NewVal); + } + + CurAST->deleteValue(User); + User->eraseFromParent(); + } + + // fwew, we're done! } + /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info. void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { - AliasSetTracker *AST = LoopToAliasMap[L]; + AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); if (!AST) return; @@ -873,7 +909,7 @@ void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias /// set. void LICM::deleteAnalysisValue(Value *V, Loop *L) { - AliasSetTracker *AST = LoopToAliasMap[L]; + AliasSetTracker *AST = LoopToAliasSetMap.lookup(L); if (!AST) return; diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index e4894e99b68f1..543dfc1cba096 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -28,7 +28,7 @@ namespace { class LoopDeletion : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopDeletion() : LoopPass(&ID) {} + LoopDeletion() : LoopPass(ID) {} // Possibly eliminate loop L if it is dead. bool runOnLoop(Loop* L, LPPassManager& LPM); @@ -38,9 +38,9 @@ namespace { bool &Changed, BasicBlock *Preheader); virtual void getAnalysisUsage(AnalysisUsage& AU) const { - AU.addRequired<ScalarEvolution>(); AU.addRequired<DominatorTree>(); AU.addRequired<LoopInfo>(); + AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); @@ -55,7 +55,8 @@ namespace { } char LoopDeletion::ID = 0; -static RegisterPass<LoopDeletion> X("loop-deletion", "Delete dead loops"); +INITIALIZE_PASS(LoopDeletion, "loop-deletion", + "Delete dead loops", false, false); Pass* llvm::createLoopDeletionPass() { return new LoopDeletion(); diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 31058e5759a4a..a4336743a8f01 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -74,7 +74,7 @@ namespace { class LoopIndexSplit : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopIndexSplit() : LoopPass(&ID) {} + LoopIndexSplit() : LoopPass(ID) {} // Index split Loop L. Return true if loop is split. bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -197,8 +197,8 @@ namespace { } char LoopIndexSplit::ID = 0; -static RegisterPass<LoopIndexSplit> -X("loop-index-split", "Index Split Loops"); +INITIALIZE_PASS(LoopIndexSplit, "loop-index-split", + "Index Split Loops", false, false); Pass *llvm::createLoopIndexSplitPass() { return new LoopIndexSplit(); @@ -677,7 +677,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB); PI != PE; ++PI) { BasicBlock *P = *PI; - if (P == DeadBB || DT->dominates(DeadBB, P)) + if (DT->dominates(DeadBB, P)) PredBlocks.push_back(P); } @@ -799,7 +799,7 @@ void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB, // the dominance frontiers. for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end(); I != E; ++I) { - if (*I == CondBB || !DT->dominates(CondBB, *I)) continue; + if (!DT->properlyDominates(CondBB, *I)) continue; DominanceFrontier::iterator BBDF = DF->find(*I); DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin(); DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end(); @@ -1183,7 +1183,7 @@ bool LoopIndexSplit::cleanBlock(BasicBlock *BB) { bool usedOutsideBB = false; for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) { - Instruction *U = cast<Instruction>(UI); + Instruction *U = cast<Instruction>(*UI); if (U->getParent() != BB) usedOutsideBB = true; } diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 16c4a15d3550a..65acc1d9257ad 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -35,7 +35,7 @@ namespace { class LoopRotate : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopRotate() : LoopPass(&ID) {} + LoopRotate() : LoopPass(ID) {} // Rotate Loop L as many times as possible. Return true if // loop is rotated at least once. @@ -43,15 +43,15 @@ namespace { // LCSSA form makes instruction renaming easier. virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); + AU.addPreserved<DominanceFrontier>(); + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); AU.addRequiredID(LoopSimplifyID); AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved<ScalarEvolution>(); - AU.addRequired<LoopInfo>(); - AU.addPreserved<LoopInfo>(); - AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); } // Helper functions @@ -79,7 +79,7 @@ namespace { } char LoopRotate::ID = 0; -static RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops"); +INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false); Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } @@ -221,7 +221,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { // The value now exits in two versions: the initial value in the preheader // and the loop "next" value in the original header. - SSA.Initialize(OrigHeaderVal); + SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal); @@ -261,6 +261,26 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { // NewHeader is now the header of the loop. L->moveToHeader(NewHeader); + // Move the original header to the bottom of the loop, where it now more + // naturally belongs. This isn't necessary for correctness, and CodeGen can + // usually reorder blocks on its own to fix things like this up, but it's + // still nice to keep the IR readable. + // + // The original header should have only one predecessor at this point, since + // we checked that the loop had a proper preheader and unique backedge before + // we started. + assert(OrigHeader->getSinglePredecessor() && + "Original loop header has too many predecessors after loop rotation!"); + OrigHeader->moveAfter(OrigHeader->getSinglePredecessor()); + + // Also, since this original header only has one predecessor, zap its + // PHI nodes, which are now trivial. + FoldSingleEntryPHINodes(OrigHeader); + + // TODO: We could just go ahead and merge OrigHeader into its predecessor + // at this point, if we don't mind updating dominator info. + + // Establish a new preheader, update dominators, etc. preserveCanonicalLoopForm(LPM); ++NumRotated; diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 1f9b4156b9cd6..e8dc5d3a640e6 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -161,9 +161,10 @@ RegUseTracker::DropUse(size_t LUIdx) { bool RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { - if (!RegUsesMap.count(Reg)) return false; - const SmallBitVector &UsedByIndices = - RegUsesMap.find(Reg)->second.UsedByIndices; + RegUsesTy::const_iterator I = RegUsesMap.find(Reg); + if (I == RegUsesMap.end()) + return false; + const SmallBitVector &UsedByIndices = I->second.UsedByIndices; int i = UsedByIndices.find_first(); if (i == -1) return false; if ((size_t)i != LUIdx) return true; @@ -441,12 +442,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) { if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) { - const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, - IgnoreSignificantBits); - if (!Start) return 0; const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, IgnoreSignificantBits); if (!Step) return 0; + const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, + IgnoreSignificantBits); + if (!Start) return 0; return SE.getAddRecExpr(Start, Step, AR->getLoop()); } return 0; @@ -505,12 +506,14 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); - S = SE.getAddExpr(NewOps); + if (Result != 0) + S = SE.getAddExpr(NewOps); return Result; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + if (Result != 0) + S = SE.getAddRecExpr(NewOps, AR->getLoop()); return Result; } return 0; @@ -528,12 +531,14 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); - S = SE.getAddExpr(NewOps); + if (Result) + S = SE.getAddExpr(NewOps); return Result; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + if (Result) + S = SE.getAddRecExpr(NewOps, AR->getLoop()); return Result; } return 0; @@ -965,6 +970,12 @@ public: /// may be used. bool AllFixupsOutsideLoop; + /// WidestFixupType - This records the widest use type for any fixup using + /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different + /// max fixup widths to be equivalent, because the narrower one may be relying + /// on the implicit truncation to truncate away bogus bits. + const Type *WidestFixupType; + /// Formulae - A list of ways to build a value that can satisfy this user. /// After the list is populated, one of these is selected heuristically and /// used to formulate a replacement for OperandValToReplace in UserInst. @@ -976,15 +987,14 @@ public: LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), - AllFixupsOutsideLoop(true) {} + AllFixupsOutsideLoop(true), + WidestFixupType(0) {} bool HasFormulaWithSameRegs(const Formula &F) const; bool InsertFormula(const Formula &F); void DeleteFormula(Formula &F); void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); - void check() const; - void print(raw_ostream &OS) const; void dump() const; }; @@ -1076,13 +1086,16 @@ void LSRUse::print(raw_ostream &OS) const { for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), E = Offsets.end(); I != E; ++I) { OS << *I; - if (next(I) != E) + if (llvm::next(I) != E) OS << ','; } OS << '}'; if (AllFixupsOutsideLoop) OS << ", all-fixups-outside-loop"; + + if (WidestFixupType) + OS << ", widest fixup type: " << *WidestFixupType; } void LSRUse::dump() const { @@ -1354,6 +1367,10 @@ public: void FilterOutUndesirableDedicatedRegisters(); size_t EstimateSearchSpaceComplexity() const; + void NarrowSearchSpaceByDetectingSupersets(); + void NarrowSearchSpaceByCollapsingUnrolledCode(); + void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + void NarrowSearchSpaceByPickingWinnerRegs(); void NarrowSearchSpaceUsingHeuristics(); void SolveRecurse(SmallVectorImpl<const Formula *> &Solution, @@ -1587,7 +1604,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1); // Add one to the backedge-taken count to get the trip count. - const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One); + const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount); if (IterationCount != SE.getSCEV(Sel)) return Cond; // Check for a max calculation that matches the pattern. There's no check @@ -1919,32 +1936,41 @@ void LSRInstance::DeleteUse(LSRUse &LU) { LSRUse * LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, const LSRUse &OrigLU) { - // Search all uses for the formula. This could be more clever. Ignore - // ICmpZero uses because they may contain formulae generated by - // GenerateICmpZeroScales, in which case adding fixup offsets may - // be invalid. + // Search all uses for the formula. This could be more clever. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; + // Check whether this use is close enough to OrigLU, to see whether it's + // worthwhile looking through its formulae. + // Ignore ICmpZero uses because they may contain formulae generated by + // GenerateICmpZeroScales, in which case adding fixup offsets may + // be invalid. if (&LU != &OrigLU && LU.Kind != LSRUse::ICmpZero && LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy && + LU.WidestFixupType == OrigLU.WidestFixupType && LU.HasFormulaWithSameRegs(OrigF)) { + // Scan through this use's formulae. for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; + // Check to see if this formula has the same registers and symbols + // as OrigF. if (F.BaseRegs == OrigF.BaseRegs && F.ScaledReg == OrigF.ScaledReg && F.AM.BaseGV == OrigF.AM.BaseGV && - F.AM.Scale == OrigF.AM.Scale && - LU.Kind) { + F.AM.Scale == OrigF.AM.Scale) { if (F.AM.BaseOffs == 0) return &LU; + // This is the formula where all the registers and symbols matched; + // there aren't going to be any others. Since we declined it, we + // can skip the rest of the formulae and procede to the next LSRUse. break; } } } } + // Nothing looked good. return 0; } @@ -1976,7 +2002,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() { for (SmallSetVector<const SCEV *, 4>::const_iterator I = Strides.begin(), E = Strides.end(); I != E; ++I) for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter = - next(I); NewStrideIter != E; ++NewStrideIter) { + llvm::next(I); NewStrideIter != E; ++NewStrideIter) { const SCEV *OldStride = *I; const SCEV *NewStride = *NewStrideIter; @@ -2066,6 +2092,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + if (!LU.WidestFixupType || + SE.getTypeSizeInBits(LU.WidestFixupType) < + SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) + LU.WidestFixupType = LF.OperandValToReplace->getType(); // If this is the first use of this LSRUse, give it a formula. if (LU.Formulae.empty()) { @@ -2195,6 +2225,10 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + if (!LU.WidestFixupType || + SE.getTypeSizeInBits(LU.WidestFixupType) < + SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) + LU.WidestFixupType = LF.OperandValToReplace->getType(); InsertSupplementalFormula(U, LU, LF.LUIdx); CountRegisters(LU.Formulae.back(), Uses.size() - 1); break; @@ -2207,14 +2241,13 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { /// separate registers. If C is non-null, multiply each subexpression by C. static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl<const SCEV *> &Ops, - SmallVectorImpl<const SCEV *> &UninterestingOps, const Loop *L, ScalarEvolution &SE) { if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { // Break out add operands. for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) - CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE); + CollectSubexprs(*I, C, Ops, L, SE); return; } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { // Split a non-zero base out of an addrec. @@ -2222,8 +2255,8 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), AR->getLoop()), - C, Ops, UninterestingOps, L, SE); - CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE); + C, Ops, L, SE); + CollectSubexprs(AR->getStart(), C, Ops, L, SE); return; } } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { @@ -2233,17 +2266,13 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, dyn_cast<SCEVConstant>(Mul->getOperand(0))) { CollectSubexprs(Mul->getOperand(1), C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0, - Ops, UninterestingOps, L, SE); + Ops, L, SE); return; } } - // Otherwise use the value itself. Loop-variant "unknown" values are - // uninteresting; we won't be able to do anything meaningful with them. - if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L)) - UninterestingOps.push_back(S); - else - Ops.push_back(C ? SE.getMulExpr(C, S) : S); + // Otherwise use the value itself, optionally with a scale applied. + Ops.push_back(C ? SE.getMulExpr(C, S) : S); } /// GenerateReassociations - Split out subexpressions from adds and the bases of @@ -2257,19 +2286,19 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { const SCEV *BaseReg = Base.BaseRegs[i]; - SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps; - CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE); - - // Add any uninteresting values as one register, as we won't be able to - // form any interesting reassociation opportunities with them. They'll - // just have to be added inside the loop no matter what we do. - if (!UninterestingAddOps.empty()) - AddOps.push_back(SE.getAddExpr(UninterestingAddOps)); + SmallVector<const SCEV *, 8> AddOps; + CollectSubexprs(BaseReg, 0, AddOps, L, SE); if (AddOps.size() == 1) continue; for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), JE = AddOps.end(); J != JE; ++J) { + + // Loop-variant "unknown" values are uninteresting; we won't be able to + // do anything meaningful with them. + if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L)) + continue; + // Don't pull a constant into a register if the constant could be folded // into an immediate field. if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset, @@ -2279,9 +2308,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, // Collect all operands except *J. SmallVector<const SCEV *, 8> InnerAddOps - ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); + (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); InnerAddOps.append - (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end()); + (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end()); // Don't leave just a constant behind in a register if the constant could // be folded into an immediate field. @@ -2377,7 +2406,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, LU.AccessTy, TLI)) { // Add the offset to the base register. - const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I)); + const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); // If it cancelled out, drop the base register, otherwise update it. if (NewG->isZero()) { std::swap(F.BaseRegs[i], F.BaseRegs.back()); @@ -2778,6 +2807,10 @@ LSRInstance::GenerateAllReuseFormulae() { } GenerateCrossUseConstantOffsets(); + + DEBUG(dbgs() << "\n" + "After generating reuse formulae:\n"; + print_uses(dbgs())); } /// If their are multiple formulae with the same set of registers used @@ -2876,11 +2909,11 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const { return Power; } -/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of -/// formulae to choose from, use some rough heuristics to prune down the number -/// of formulae. This keeps the main solver from taking an extraordinary amount -/// of time in some worst-case scenarios. -void LSRInstance::NarrowSearchSpaceUsingHeuristics() { +/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset +/// of the registers of another formula, it won't help reduce register +/// pressure (though it may not necessarily hurt register pressure); remove +/// it to simplify the system. +void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { DEBUG(dbgs() << "The search space is too complex.\n"); @@ -2938,7 +2971,12 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } +} +/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers +/// for expressions like A, A+1, A+2, etc., allocate a single register for +/// them. +void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { DEBUG(dbgs() << "The search space is too complex.\n"); @@ -2988,7 +3026,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { if (Fixup.LUIdx == LUIdx) { Fixup.LUIdx = LUThatHas - &Uses.front(); Fixup.Offset += F.AM.BaseOffs; - DEBUG(errs() << "New fixup has offset " + DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); } if (Fixup.LUIdx == NumUses-1) @@ -3009,7 +3047,30 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } +} + +/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call +/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that +/// we've done more filtering, as it may be able to find more formulae to +/// eliminate. +void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ + if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { + DEBUG(dbgs() << "The search space is too complex.\n"); + + DEBUG(dbgs() << "Narrowing the search space by re-filtering out " + "undesirable dedicated registers.\n"); + + FilterOutUndesirableDedicatedRegisters(); + + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); + } +} +/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely +/// to be profitable, and then in any use which has any reference to that +/// register, delete all formulae which do not reference that register. +void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { // With all other options exhausted, loop until the system is simple // enough to handle. SmallPtrSet<const SCEV *, 4> Taken; @@ -3071,6 +3132,17 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { } } +/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of +/// formulae to choose from, use some rough heuristics to prune down the number +/// of formulae. This keeps the main solver from taking an extraordinary amount +/// of time in some worst-case scenarios. +void LSRInstance::NarrowSearchSpaceUsingHeuristics() { + NarrowSearchSpaceByDetectingSupersets(); + NarrowSearchSpaceByCollapsingUnrolledCode(); + NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + NarrowSearchSpaceByPickingWinnerRegs(); +} + /// SolveRecurse - This is the recursive solver. void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, Cost &SolutionCost, @@ -3614,10 +3686,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) // to formulate the values needed for the uses. GenerateAllReuseFormulae(); - DEBUG(dbgs() << "\n" - "After generating reuse formulae:\n"; - print_uses(dbgs())); - FilterOutUndesirableDedicatedRegisters(); NarrowSearchSpaceUsingHeuristics(); @@ -3724,15 +3792,15 @@ private: } char LoopStrengthReduce::ID = 0; -static RegisterPass<LoopStrengthReduce> -X("loop-reduce", "Loop Strength Reduction"); +INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce", + "Loop Strength Reduction", false, false); Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { return new LoopStrengthReduce(TLI); } LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli) - : LoopPass(&ID), TLI(tli) {} + : LoopPass(ID), TLI(tli) {} void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { // We split critical edges, so we change the CFG. However, we do update diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 4ad41ae4b59f7..d0edfa2200513 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -17,6 +17,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -26,7 +27,7 @@ using namespace llvm; static cl::opt<unsigned> -UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden, +UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden, cl::desc("The cut-off point for automatic loop unrolling")); static cl::opt<unsigned> @@ -42,7 +43,7 @@ namespace { class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopUnroll() : LoopPass(&ID) {} + LoopUnroll() : LoopPass(ID) {} /// A magic value for use with the Threshold parameter to indicate /// that the loop unroll should be performed regardless of how much @@ -55,23 +56,24 @@ namespace { /// loop preheaders be inserted into the CFG... /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - AU.addRequired<LoopInfo>(); AU.addPreservedID(LCSSAID); - AU.addPreserved<LoopInfo>(); + AU.addPreserved<ScalarEvolution>(); // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. // If loop unroll does not preserve dom info then LCSSA pass on next // loop will receive invalid dom info. // For now, recreate dom info, if loop is unrolled. AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); } }; } char LoopUnroll::ID = 0; -static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops"); +INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false); Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } @@ -145,12 +147,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; // FIXME: Reconstruct dom info, because it is not preserved properly. - DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); - if (DT) { + if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) DT->runOnFunction(*F); - DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>(); - if (DF) - DF->runOnFunction(*F); - } return true; } diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 0c900ffc40277..9afe428ba5691 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -77,7 +77,6 @@ namespace { bool redoLoop; Loop *currentLoop; - DominanceFrontier *DF; DominatorTree *DT; BasicBlock *loopHeader; BasicBlock *loopPreheader; @@ -92,15 +91,15 @@ namespace { public: static char ID; // Pass ID, replacement for typeid explicit LoopUnswitch(bool Os = false) : - LoopPass(&ID), OptimizeForSize(Os), redoLoop(false), - currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL), + LoopPass(ID), OptimizeForSize(Os), redoLoop(false), + currentLoop(NULL), DT(NULL), loopHeader(NULL), loopPreheader(NULL) {} bool runOnLoop(Loop *L, LPPassManager &LPM); bool processCurrentLoop(); /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG... + /// loop preheaders be inserted into the CFG. /// virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(LoopSimplifyID); @@ -110,7 +109,6 @@ namespace { AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); } private: @@ -160,7 +158,7 @@ namespace { }; } char LoopUnswitch::ID = 0; -static RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops"); +INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false); Pass *llvm::createLoopUnswitchPass(bool Os) { return new LoopUnswitch(Os); @@ -201,7 +199,6 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { LI = &getAnalysis<LoopInfo>(); LPM = &LPM_Ref; - DF = getAnalysisIfAvailable<DominanceFrontier>(); DT = getAnalysisIfAvailable<DominatorTree>(); currentLoop = L; Function *F = currentLoop->getHeader()->getParent(); @@ -216,8 +213,6 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { // FIXME: Reconstruct dom info, because it is not preserved properly. if (DT) DT->runOnFunction(*F); - if (DF) - DF->runOnFunction(*F); } return Changed; } @@ -282,19 +277,18 @@ bool LoopUnswitch::processCurrentLoop() { return Changed; } -/// isTrivialLoopExitBlock - Check to see if all paths from BB either: -/// 1. Exit the loop with no side effects. -/// 2. Branch to the latch block with no side-effects. +/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the +/// loop with no side effects (including infinite loops). /// -/// If these conditions are true, we return true and set ExitBB to the block we +/// If true, we return true and set ExitBB to the block we /// exit through. /// static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, BasicBlock *&ExitBB, std::set<BasicBlock*> &Visited) { if (!Visited.insert(BB).second) { - // Already visited and Ok, end of recursion. - return true; + // Already visited. Without more analysis, this could indicate an infinte loop. + return false; } else if (!L->contains(BB)) { // Otherwise, this is a loop exit, this is fine so long as this is the // first exit. @@ -324,7 +318,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, /// process. If so, return the block that is exited to, otherwise return null. static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { std::set<BasicBlock*> Visited; - Visited.insert(L->getHeader()); // Branches to header are ok. + Visited.insert(L->getHeader()); // Branches to header make infinite loops. BasicBlock *ExitBB = 0; if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) return ExitBB; @@ -356,8 +350,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, if (!BI->isConditional() || BI->getCondition() != Cond) return false; - // Check to see if a successor of the branch is guaranteed to go to the - // latch block or exit through a one exit block without having any + // Check to see if a successor of the branch is guaranteed to + // exit through a unique exit block without having any // side-effects. If so, determine the value of Cond that causes it to do // this. if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp new file mode 100644 index 0000000000000..973ffe7e6a40f --- /dev/null +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -0,0 +1,161 @@ +//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers atomic intrinsics to non-atomic form for use in a known +// non-preemptible environment. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loweratomic" +#include "llvm/Transforms/Scalar.h" +#include "llvm/BasicBlock.h" +#include "llvm/Function.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/Support/IRBuilder.h" + +using namespace llvm; + +namespace { + +bool LowerAtomicIntrinsic(CallInst *CI) { + IRBuilder<> Builder(CI->getParent(), CI); + + Function *Callee = CI->getCalledFunction(); + if (!Callee) + return false; + + unsigned IID = Callee->getIntrinsicID(); + switch (IID) { + case Intrinsic::memory_barrier: + break; + + case Intrinsic::atomic_load_add: + case Intrinsic::atomic_load_sub: + case Intrinsic::atomic_load_and: + case Intrinsic::atomic_load_nand: + case Intrinsic::atomic_load_or: + case Intrinsic::atomic_load_xor: + case Intrinsic::atomic_load_max: + case Intrinsic::atomic_load_min: + case Intrinsic::atomic_load_umax: + case Intrinsic::atomic_load_umin: { + Value *Ptr = CI->getArgOperand(0); + Value *Delta = CI->getArgOperand(1); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Res = NULL; + switch (IID) { + default: assert(0 && "Unrecognized atomic modify operation"); + case Intrinsic::atomic_load_add: + Res = Builder.CreateAdd(Orig, Delta); + break; + case Intrinsic::atomic_load_sub: + Res = Builder.CreateSub(Orig, Delta); + break; + case Intrinsic::atomic_load_and: + Res = Builder.CreateAnd(Orig, Delta); + break; + case Intrinsic::atomic_load_nand: + Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta)); + break; + case Intrinsic::atomic_load_or: + Res = Builder.CreateOr(Orig, Delta); + break; + case Intrinsic::atomic_load_xor: + Res = Builder.CreateXor(Orig, Delta); + break; + case Intrinsic::atomic_load_max: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), + Delta, + Orig); + break; + case Intrinsic::atomic_load_min: + Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta), + Orig, + Delta); + break; + case Intrinsic::atomic_load_umax: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), + Delta, + Orig); + break; + case Intrinsic::atomic_load_umin: + Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta), + Orig, + Delta); + break; + } + Builder.CreateStore(Res, Ptr); + + CI->replaceAllUsesWith(Orig); + break; + } + + case Intrinsic::atomic_swap: { + Value *Ptr = CI->getArgOperand(0); + Value *Val = CI->getArgOperand(1); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Builder.CreateStore(Val, Ptr); + + CI->replaceAllUsesWith(Orig); + break; + } + + case Intrinsic::atomic_cmp_swap: { + Value *Ptr = CI->getArgOperand(0); + Value *Cmp = CI->getArgOperand(1); + Value *Val = CI->getArgOperand(2); + + LoadInst *Orig = Builder.CreateLoad(Ptr); + Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); + Value *Res = Builder.CreateSelect(Equal, Val, Orig); + Builder.CreateStore(Res, Ptr); + + CI->replaceAllUsesWith(Orig); + break; + } + + default: + return false; + } + + assert(CI->use_empty() && + "Lowering should have eliminated any uses of the intrinsic call!"); + CI->eraseFromParent(); + + return true; +} + +struct LowerAtomic : public BasicBlockPass { + static char ID; + LowerAtomic() : BasicBlockPass(ID) {} + bool runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) { + Instruction *Inst = DI++; + if (CallInst *CI = dyn_cast<CallInst>(Inst)) + Changed |= LowerAtomicIntrinsic(CI); + } + return Changed; + } + +}; + +} + +char LowerAtomic::ID = 0; +INITIALIZE_PASS(LowerAtomic, "loweratomic", + "Lower atomic intrinsics to non-atomic form", + false, false); + +Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 0e566c5bd9be2..24fae423d2f70 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -304,7 +304,7 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - MemCpyOpt() : FunctionPass(&ID) {} + MemCpyOpt() : FunctionPass(ID) {} private: // This transformation requires dominator postdominator info @@ -331,8 +331,7 @@ namespace { // createMemCpyOptPass - The public interface to this file... FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); } -static RegisterPass<MemCpyOpt> X("memcpyopt", - "MemCpy Optimization"); +INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false); @@ -374,7 +373,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // If the call is readnone, ignore it, otherwise bail out. We don't even // allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). - if (AA.getModRefBehavior(CallSite::get(BI)) == + if (AA.getModRefBehavior(CallSite(BI)) == AliasAnalysis::DoesNotAccessMemory) continue; @@ -509,7 +508,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // because we'll need to do type comparisons based on the underlying type. Value *cpyDest = cpy->getDest(); Value *cpySrc = cpy->getSource(); - CallSite CS = CallSite::get(C); + CallSite CS(C); // We need to be able to reason about the size of the memcpy, so we require // that it be a constant. @@ -637,10 +636,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { return true; } -/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which -/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be -/// a memcpy from X to Z (or potentially a memmove, depending on circumstances). -/// This allows later passes to remove the first memcpy altogether. +/// processMemCpy - perform simplification of memcpy's. If we have memcpy A +/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite +/// B to be a memcpy from X to Z (or potentially a memmove, depending on +/// circumstances). This allows later passes to remove the first memcpy +/// altogether. bool MemCpyOpt::processMemCpy(MemCpyInst *M) { MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); @@ -744,7 +744,8 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { const Type *ArgTys[3] = { M->getRawDest()->getType(), M->getRawSource()->getType(), M->getLength()->getType() }; - M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3)); + M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, + ArgTys, 3)); // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 98452f5d82c47..b8afcc12d927d 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -77,7 +77,7 @@ namespace { bool MadeChange; public: static char ID; // Pass identification, replacement for typeid - Reassociate() : FunctionPass(&ID) {} + Reassociate() : FunctionPass(ID) {} bool runOnFunction(Function &F); @@ -103,7 +103,8 @@ namespace { } char Reassociate::ID = 0; -static RegisterPass<Reassociate> X("reassociate", "Reassociate expressions"); +INITIALIZE_PASS(Reassociate, "reassociate", + "Reassociate expressions", false, false); // Public interface to the Reassociate pass FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 13222ac22004e..506b72ac34e0d 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -36,7 +36,7 @@ STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); namespace { struct RegToMem : public FunctionPass { static char ID; // Pass identification, replacement for typeid - RegToMem() : FunctionPass(&ID) {} + RegToMem() : FunctionPass(ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(BreakCriticalEdgesID); @@ -59,8 +59,8 @@ namespace { } char RegToMem::ID = 0; -static RegisterPass<RegToMem> -X("reg2mem", "Demote all values to stack slots"); +INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots", + false, false); bool RegToMem::runOnFunction(Function &F) { @@ -124,7 +124,7 @@ bool RegToMem::runOnFunction(Function &F) { // createDemoteRegisterToMemory - Provide an entry point to create this pass. // -const PassInfo *const llvm::DemoteRegisterToMemoryID = &X; +char &llvm::DemoteRegisterToMemoryID = RegToMem::ID; FunctionPass *llvm::createDemoteRegisterToMemoryPass() { return new RegToMem(); } diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 907ece8fcce97..6115c05c20ac4 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -275,12 +275,12 @@ public: return I->second; } - LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const { + /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const { DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = StructValueState.find(std::make_pair(V, i)); assert(I != StructValueState.end() && "V is not in valuemap!"); return I->second; - } + }*/ /// getTrackedRetVals - Get the inferred return value map. /// @@ -508,17 +508,16 @@ private: void visitLoadInst (LoadInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); void visitCallInst (CallInst &I) { - visitCallSite(CallSite::get(&I)); + visitCallSite(&I); } void visitInvokeInst (InvokeInst &II) { - visitCallSite(CallSite::get(&II)); + visitCallSite(&II); visitTerminatorInst(II); } void visitCallSite (CallSite CS); void visitUnwindInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } void visitAllocaInst (Instruction &I) { markOverdefined(&I); } - void visitVANextInst (Instruction &I) { markOverdefined(&I); } void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); } void visitInstruction(Instruction &I) { @@ -1586,7 +1585,7 @@ namespace { /// struct SCCP : public FunctionPass { static char ID; // Pass identification, replacement for typeid - SCCP() : FunctionPass(&ID) {} + SCCP() : FunctionPass(ID) {} // runOnFunction - Run the Sparse Conditional Constant Propagation // algorithm, and return true if the function was modified. @@ -1600,8 +1599,8 @@ namespace { } // end anonymous namespace char SCCP::ID = 0; -static RegisterPass<SCCP> -X("sccp", "Sparse Conditional Constant Propagation"); +INITIALIZE_PASS(SCCP, "sccp", + "Sparse Conditional Constant Propagation", false, false); // createSCCPPass - This is the public interface to this file. FunctionPass *llvm::createSCCPPass() { @@ -1702,14 +1701,15 @@ namespace { /// struct IPSCCP : public ModulePass { static char ID; - IPSCCP() : ModulePass(&ID) {} + IPSCCP() : ModulePass(ID) {} bool runOnModule(Module &M); }; } // end anonymous namespace char IPSCCP::ID = 0; -static RegisterPass<IPSCCP> -Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation"); +INITIALIZE_PASS(IPSCCP, "ipsccp", + "Interprocedural Sparse Conditional Constant Propagation", + false, false); // createIPSCCPPass - This is the public interface to this file. ModulePass *llvm::createIPSCCPPass() { @@ -1748,6 +1748,13 @@ static bool AddressIsTaken(const GlobalValue *GV) { bool IPSCCP::runOnModule(Module &M) { SCCPSolver Solver(getAnalysisIfAvailable<TargetData>()); + // AddressTakenFunctions - This set keeps track of the address-taken functions + // that are in the input. As IPSCCP runs through and simplifies code, + // functions that were address taken can end up losing their + // address-taken-ness. Because of this, we keep track of their addresses from + // the first pass so we can use them for the later simplification pass. + SmallPtrSet<Function*, 32> AddressTakenFunctions; + // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. // @@ -1763,9 +1770,13 @@ bool IPSCCP::runOnModule(Module &M) { // If this function only has direct calls that we can see, we can track its // arguments and return value aggressively, and can assume it is not called // unless we see evidence to the contrary. - if (F->hasLocalLinkage() && !AddressIsTaken(F)) { - Solver.AddArgumentTrackedFunction(F); - continue; + if (F->hasLocalLinkage()) { + if (AddressIsTaken(F)) + AddressTakenFunctions.insert(F); + else { + Solver.AddArgumentTrackedFunction(F); + continue; + } } // Assume the function is called. @@ -1950,7 +1961,7 @@ bool IPSCCP::runOnModule(Module &M) { continue; // We can only do this if we know that nothing else can call the function. - if (!F->hasLocalLinkage() || AddressIsTaken(F)) + if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F)) continue; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index dd445f63320a4..fee317dbd9ab5 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -28,6 +28,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Target/TargetData.h" @@ -51,7 +52,7 @@ STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { struct SROA : public FunctionPass { static char ID; // Pass identification, replacement for typeid - explicit SROA(signed T = -1) : FunctionPass(&ID) { + explicit SROA(signed T = -1) : FunctionPass(ID) { if (T == -1) SRThreshold = 128; else @@ -114,8 +115,7 @@ namespace { void DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList); void DeleteDeadInstructions(); - AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - + void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts); void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, @@ -135,7 +135,8 @@ namespace { } char SROA::ID = 0; -static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates"); +INITIALIZE_PASS(SROA, "scalarrepl", + "Scalar Replacement of Aggregates", false, false); // Public interface to the ScalarReplAggregates pass FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { @@ -193,6 +194,27 @@ private: }; } // end anonymous namespace. + +/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't +/// allowed to form. We do this to avoid MMX types, which is a complete hack, +/// but is required until the backend is fixed. +static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) { + StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple()); + if (!Triple.startswith("i386") && + !Triple.startswith("x86_64")) + return false; + + // Reject all the MMX vector types. + switch (VTy->getNumElements()) { + default: return false; + case 1: return VTy->getElementType()->isIntegerTy(64); + case 2: return VTy->getElementType()->isIntegerTy(32); + case 4: return VTy->getElementType()->isIntegerTy(16); + case 8: return VTy->getElementType()->isIntegerTy(8); + } +} + + /// TryConvert - Analyze the specified alloca, and if it is safe to do so, /// rewrite it to be a new alloca which is mem2reg'able. This returns the new /// alloca if possible or null if not. @@ -209,7 +231,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. const Type *NewTy; - if (VectorTy && VectorTy->isVectorTy() && HadAVector) { + if (VectorTy && VectorTy->isVectorTy() && HadAVector && + !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) { DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. @@ -969,7 +992,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); if (Length) isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, - UI.getOperandNo() == CallInst::ArgOffset, Info); + UI.getOperandNo() == 0, Info); else MarkUnsafe(Info); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { @@ -1662,6 +1685,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) + return HasPadding(ATy->getElementType(), TD); + + if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) + return HasPadding(VTy->getElementType(), TD); + if (const StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = TD.getStructLayout(STy); unsigned PrevFieldBitOffset = 0; @@ -1691,12 +1720,8 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) { if (PrevFieldEnd < SL->getSizeInBits()) return true; } - - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { - return HasPadding(ATy->getElementType(), TD); - } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { - return HasPadding(VTy->getElementType(), TD); } + return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); } @@ -1787,7 +1812,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (isOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != CallInst::ArgOffset) return false; + if (UI.getOperandNo() != 0) return false; // If the source of the memcpy/move is not a constant global, reject it. if (!PointsToConstantGlobal(MI->getSource())) diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 49d93a2fcc271..360749caf1116 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -42,14 +42,15 @@ STATISTIC(NumSimpl, "Number of blocks simplified"); namespace { struct CFGSimplifyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - CFGSimplifyPass() : FunctionPass(&ID) {} + CFGSimplifyPass() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); }; } char CFGSimplifyPass::ID = 0; -static RegisterPass<CFGSimplifyPass> X("simplifycfg", "Simplify the CFG"); +INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg", + "Simplify the CFG", false, false); // Public interface to the CFGSimplification pass FunctionPass *llvm::createCFGSimplificationPass() { @@ -284,10 +285,9 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { while (LocalChange) { LocalChange = false; - // Loop over all of the basic blocks (except the first one) and remove them - // if they are unneeded... + // Loop over all of the basic blocks and remove them if they are unneeded... // - for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) { + for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { if (SimplifyCFG(BBIt++, TD)) { LocalChange = true; ++NumSimpl; diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp index c3408e77807fb..3ec70ec2e024f 100644 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -32,7 +32,7 @@ namespace { const TargetData *TD; public: static char ID; // Pass identification - SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {} + SimplifyHalfPowrLibCalls() : FunctionPass(ID) {} bool runOnFunction(Function &F); @@ -46,8 +46,8 @@ namespace { char SimplifyHalfPowrLibCalls::ID = 0; } // end anonymous namespace. -static RegisterPass<SimplifyHalfPowrLibCalls> -X("simplify-libcalls-halfpowr", "Simplify half_powr library calls"); +INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr", + "Simplify half_powr library calls", false, false); // Public interface to the Simplify HalfPowr LibCalls pass. FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() { diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index b1c619125c355..d7ce53f367153 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -532,7 +532,7 @@ struct StrStrOpt : public LibCallOptimization { StrLen, B, TD); for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); UI != UE; ) { - ICmpInst *Old = cast<ICmpInst>(UI++); + ICmpInst *Old = cast<ICmpInst>(*UI++); Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, ConstantInt::getNullValue(StrNCmp->getType()), "cmp"); @@ -566,8 +566,8 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD), - CI->getType()); + return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), + ToFindStr[0], B, TD), CI->getType()); return 0; } }; @@ -681,8 +681,8 @@ struct MemSetOpt : public LibCallOptimization { return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context), - false); + Value *Val = B.CreateIntCast(CI->getArgOperand(1), + Type::getInt8Ty(*Context), false); EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); return CI->getArgOperand(0); } @@ -1042,9 +1042,9 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte. - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()+1), 1, false, B, TD); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the + ConstantInt::get(TD->getIntPtrType(*Context), // nul byte. + FormatStr.size() + 1), 1, false, B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1080,7 +1080,8 @@ struct SPrintFOpt : public LibCallOptimization { Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), + IncLen, 1, false, B, TD); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -1236,7 +1237,7 @@ namespace { bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification - SimplifyLibCalls() : FunctionPass(&ID), StrCpy(false), StrCpyChk(true) {} + SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {} void InitOptimizations(); bool runOnFunction(Function &F); @@ -1253,8 +1254,8 @@ namespace { char SimplifyLibCalls::ID = 0; } // end anonymous namespace. -static RegisterPass<SimplifyLibCalls> -X("simplify-libcalls", "Simplify well-known library calls"); +INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls", + "Simplify well-known library calls", false, false); // Public interface to the Simplify LibCalls pass. FunctionPass *llvm::createSimplifyLibCallsPass() { @@ -2155,7 +2156,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { // * pow(pow(x,y),z)-> pow(x,y*z) // // puts: -// * puts("") -> putchar("\n") +// * puts("") -> putchar('\n') // // round, roundf, roundl: // * round(cnst) -> cnst' diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index b88ba48505092..95d3dedfb62db 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -35,7 +35,7 @@ namespace { public: static char ID; // Pass identification - Sinking() : FunctionPass(&ID) {} + Sinking() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -56,8 +56,7 @@ namespace { } // end anonymous namespace char Sinking::ID = 0; -static RegisterPass<Sinking> -X("sink", "Code sinking"); +INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false); FunctionPass *llvm::createSinkingPass() { return new Sinking(); } diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index 9208238f4ba5e..2e437ac778c8c 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -49,7 +49,7 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - TailDup() : FunctionPass(&ID) {} + TailDup() : FunctionPass(ID) {} private: inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned); @@ -59,7 +59,7 @@ namespace { } char TailDup::ID = 0; -static RegisterPass<TailDup> X("tailduplicate", "Tail Duplication"); +INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false); // Public interface to the Tail Duplication pass FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 01c8e5d6fcf48..371725467a24e 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -72,7 +72,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { struct TailCallElim : public FunctionPass { static char ID; // Pass identification, replacement for typeid - TailCallElim() : FunctionPass(&ID) {} + TailCallElim() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -87,7 +87,8 @@ namespace { } char TailCallElim::ID = 0; -static RegisterPass<TailCallElim> X("tailcallelim", "Tail Call Elimination"); +INITIALIZE_PASS(TailCallElim, "tailcallelim", + "Tail Call Elimination", false, false); // Public interface to the TailCallElimination pass FunctionPass *llvm::createTailCallEliminationPass() { @@ -277,22 +278,22 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { Function *F = CI->getParent()->getParent(); Value *ReturnedValue = 0; - for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) - if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator())) - if (RI != IgnoreRI) { - Value *RetOp = RI->getOperand(0); - - // We can only perform this transformation if the value returned is - // evaluatable at the start of the initial invocation of the function, - // instead of at the end of the evaluation. - // - if (!isDynamicConstant(RetOp, CI, RI)) - return 0; - - if (ReturnedValue && RetOp != ReturnedValue) - return 0; // Cannot transform if differing values are returned. - ReturnedValue = RetOp; - } + for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) { + ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()); + if (RI == 0 || RI == IgnoreRI) continue; + + // We can only perform this transformation if the value returned is + // evaluatable at the start of the initial invocation of the function, + // instead of at the end of the evaluation. + // + Value *RetOp = RI->getOperand(0); + if (!isDynamicConstant(RetOp, CI, RI)) + return 0; + + if (ReturnedValue && RetOp != ReturnedValue) + return 0; // Cannot transform if differing values are returned. + ReturnedValue = RetOp; + } return ReturnedValue; } @@ -306,7 +307,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, assert(I->getNumOperands() == 2 && "Associative/commutative operations should have 2 args!"); - // Exactly one operand should be the result of the call instruction... + // Exactly one operand should be the result of the call instruction. if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || (I->getOperand(0) != CI && I->getOperand(1) != CI)) return 0; @@ -386,21 +387,22 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // tail call if all of the instructions between the call and the return are // movable to above the call itself, leaving the call next to the return. // Check that this is the case now. - for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) - if (!CanMoveAboveCall(BBI, CI)) { - // If we can't move the instruction above the call, it might be because it - // is an associative and commutative operation that could be tranformed - // using accumulator recursion elimination. Check to see if this is the - // case, and if so, remember the initial accumulator value for later. - if ((AccumulatorRecursionEliminationInitVal = - CanTransformAccumulatorRecursion(BBI, CI))) { - // Yes, this is accumulator recursion. Remember which instruction - // accumulates. - AccumulatorRecursionInstr = BBI; - } else { - return false; // Otherwise, we cannot eliminate the tail recursion! - } + for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) { + if (CanMoveAboveCall(BBI, CI)) continue; + + // If we can't move the instruction above the call, it might be because it + // is an associative and commutative operation that could be tranformed + // using accumulator recursion elimination. Check to see if this is the + // case, and if so, remember the initial accumulator value for later. + if ((AccumulatorRecursionEliminationInitVal = + CanTransformAccumulatorRecursion(BBI, CI))) { + // Yes, this is accumulator recursion. Remember which instruction + // accumulates. + AccumulatorRecursionInstr = BBI; + } else { + return false; // Otherwise, we cannot eliminate the tail recursion! } + } // We can only transform call/return pairs that either ignore the return value // of the call and return void, ignore the value of the call and return a diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index ec625b4cbb28f..093083a630cf3 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -97,23 +97,13 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) { /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor, /// if possible. The return value indicates success or failure. bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { - pred_iterator PI(pred_begin(BB)), PE(pred_end(BB)); - // Can't merge the entry block. Don't merge away blocks who have their - // address taken: this is a bug if the predecessor block is the entry node - // (because we'd end up taking the address of the entry) and undesirable in - // any case. - if (pred_begin(BB) == pred_end(BB) || - BB->hasAddressTaken()) return false; + // Don't merge away blocks who have their address taken. + if (BB->hasAddressTaken()) return false; - BasicBlock *PredBB = *PI++; - for (; PI != PE; ++PI) // Search all predecessors, see if they are all same - if (*PI != PredBB) { - PredBB = 0; // There are multiple different predecessors... - break; - } - - // Can't merge if there are multiple predecessors. + // Can't merge if there are multiple predecessors, or no predecessors. + BasicBlock *PredBB = BB->getUniquePredecessor(); if (!PredBB) return false; + // Don't break self-loops. if (PredBB == BB) return false; // Don't break invokes. @@ -267,7 +257,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { case Instruction::Switch: // Should remove entry default: case Instruction::Ret: // Cannot happen, has no successors! - llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!"); + llvm_unreachable("Unhandled terminator inst type in RemoveSuccessor!"); } if (NewTI) // If it's a different instruction, replace. @@ -421,7 +411,8 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); - if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) + if (DominanceFrontier *DF = + P ? P->getAnalysisIfAvailable<DominanceFrontier>() : 0) DF->splitBlock(NewBB); // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp index f0e31efa30c43..23a30cc585077 100644 --- a/lib/Transforms/Utils/BasicInliner.cpp +++ b/lib/Transforms/Utils/BasicInliner.cpp @@ -82,8 +82,8 @@ void BasicInlinerImpl::inlineFunctions() { Function *F = *FI; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { - CallSite CS = CallSite::get(I); - if (CS.getInstruction() && CS.getCalledFunction() + CallSite CS(cast<Value>(I)); + if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isDeclaration()) CallSites.push_back(CS); } diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 26f53c05a042f..f75ffe6105fa6 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -36,7 +36,7 @@ STATISTIC(NumBroken, "Number of blocks inserted"); namespace { struct BreakCriticalEdges : public FunctionPass { static char ID; // Pass identification, replacement for typeid - BreakCriticalEdges() : FunctionPass(&ID) {} + BreakCriticalEdges() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); @@ -53,11 +53,11 @@ namespace { } char BreakCriticalEdges::ID = 0; -static RegisterPass<BreakCriticalEdges> -X("break-crit-edges", "Break critical edges in CFG"); +INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", + "Break critical edges in CFG", false, false); // Publically exposed interface to pass... -const PassInfo *const llvm::BreakCriticalEdgesID = &X; +char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; FunctionPass *llvm::createBreakCriticalEdgesPass() { return new BreakCriticalEdges(); } @@ -225,7 +225,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end(); UI != E; ) { Value::use_iterator Use = UI++; - if (PHINode *PN = dyn_cast<PHINode>(Use)) { + if (PHINode *PN = dyn_cast<PHINode>(*Use)) { // Remove one entry from each PHI. if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN)) PN->setOperand(Use.getOperandNo(), NewBB); diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 7a9d007ed5583..c3139498c2504 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -421,9 +421,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { - EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), - 1, false, B, TD); + if (isFoldable(3, 2, false)) { + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); replaceCall(CI->getArgOperand(0)); return true; } @@ -444,9 +444,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { - EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), - 1, false, B, TD); + if (isFoldable(3, 2, false)) { + EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); replaceCall(CI->getArgOperand(0)); return true; } @@ -462,10 +462,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + if (isFoldable(3, 2, false)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); + EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), + false, B, TD); replaceCall(CI->getArgOperand(0)); return true; } @@ -487,7 +488,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (isFoldable(2 + CallInst::ArgOffset, 1 + CallInst::ArgOffset, true)) { + if (isFoldable(2, 1, true)) { Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, Name.substr(2, 6)); replaceCall(Ret); @@ -505,7 +506,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + if (isFoldable(3, 2, false)) { Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B, TD, Name.substr(2, 7)); replaceCall(Ret); diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index dec227acafd27..61cbeb2bd35b9 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -20,7 +20,6 @@ add_llvm_library(LLVMTransformUtils Mem2Reg.cpp PromoteMemoryToRegister.cpp SSAUpdater.cpp - SSI.cpp SimplifyCFG.cpp UnifyFunctionExitNodes.cpp ValueMapper.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 1dcfd57878466..f43186edae435 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -23,7 +23,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Support/CFG.h" -#include "ValueMapper.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/SmallVector.h" @@ -69,10 +69,11 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, } // Clone OldFunc into NewFunc, transforming the old arguments into references to -// ArgMap values. +// VMap values. // void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); @@ -126,7 +127,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, BE = NewFunc->end(); BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) - RemapInstruction(II, VMap); + RemapInstruction(II, VMap, ModuleLevelChanges); } /// CloneFunction - Return a copy of the specified function, but without @@ -139,6 +140,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, /// Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, ClonedCodeInfo *CodeInfo) { std::vector<const Type*> ArgTypes; @@ -167,7 +169,7 @@ Function *llvm::CloneFunction(const Function *F, } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo); + CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo); return NewF; } @@ -180,6 +182,7 @@ namespace { Function *NewFunc; const Function *OldFunc; ValueToValueMapTy &VMap; + bool ModuleLevelChanges; SmallVectorImpl<ReturnInst*> &Returns; const char *NameSuffix; ClonedCodeInfo *CodeInfo; @@ -187,12 +190,14 @@ namespace { public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, + bool moduleLevelChanges, SmallVectorImpl<ReturnInst*> &returns, const char *nameSuffix, ClonedCodeInfo *codeInfo, const TargetData *td) - : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns), - NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { + : NewFunc(newFunc), OldFunc(oldFunc), + VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), + Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { } /// CloneBlock - The specified block is found to be reachable, clone it and @@ -313,7 +318,7 @@ ConstantFoldMappedInstruction(const Instruction *I) { SmallVector<Constant*, 8> Ops; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), - VMap))) + VMap, ModuleLevelChanges))) Ops.push_back(Op); else return 0; // All operands not constant! @@ -334,25 +339,16 @@ ConstantFoldMappedInstruction(const Instruction *I) { Ops.size(), TD); } -static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) { - DILocation ILoc(InsnMD); - if (!ILoc.Verify()) return InsnMD; +static DebugLoc +UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL, + LLVMContext &Ctx) { + DebugLoc NewLoc = TheCallDL; + if (MDNode *IA = InsnDL.getInlinedAt(Ctx)) + NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL, + Ctx); - DILocation CallLoc(TheCallMD); - if (!CallLoc.Verify()) return InsnMD; - - DILocation OrigLocation = ILoc.getOrigLocation(); - MDNode *NewLoc = TheCallMD; - if (OrigLocation.Verify()) - NewLoc = UpdateInlinedAtInfo(OrigLocation, TheCallMD); - - Value *MDVs[] = { - InsnMD->getOperand(0), // Line - InsnMD->getOperand(1), // Col - InsnMD->getOperand(2), // Scope - NewLoc - }; - return MDNode::get(InsnMD->getContext(), MDVs, 4); + return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(), + InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx)); } /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, @@ -364,6 +360,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) { /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, @@ -377,8 +374,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, assert(VMap.count(II) && "No mapping from source argument specified!"); #endif - PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns, - NameSuffix, CodeInfo, TD); + PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, + Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; @@ -408,10 +405,9 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // BasicBlock::iterator I = NewBB->begin(); - unsigned DbgKind = OldFunc->getContext().getMDKindID("dbg"); - MDNode *TheCallMD = NULL; - if (TheCall && TheCall->hasMetadata()) - TheCallMD = TheCall->getMetadata(DbgKind); + DebugLoc TheCallDL; + if (TheCall) + TheCallDL = TheCall->getDebugLoc(); // Handle PHI nodes specially, as we have to remove references to dead // blocks. @@ -420,15 +416,17 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, BasicBlock::const_iterator OldI = BI->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) { if (I->hasMetadata()) { - if (TheCallMD) { - if (MDNode *IMD = I->getMetadata(DbgKind)) { - MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD); - I->setMetadata(DbgKind, NewMD); + if (!TheCallDL.isUnknown()) { + DebugLoc IDL = I->getDebugLoc(); + if (!IDL.isUnknown()) { + DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL, + I->getContext()); + I->setDebugLoc(NewDL); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. - I->setMetadata(DbgKind, 0); + I->setDebugLoc(DebugLoc()); } } PHIToResolve.push_back(cast<PHINode>(OldI)); @@ -444,18 +442,20 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // Otherwise, remap the rest of the instructions normally. for (; I != NewBB->end(); ++I) { if (I->hasMetadata()) { - if (TheCallMD) { - if (MDNode *IMD = I->getMetadata(DbgKind)) { - MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD); - I->setMetadata(DbgKind, NewMD); + if (!TheCallDL.isUnknown()) { + DebugLoc IDL = I->getDebugLoc(); + if (!IDL.isUnknown()) { + DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL, + I->getContext()); + I->setDebugLoc(NewDL); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. - I->setMetadata(DbgKind, 0); + I->setDebugLoc(DebugLoc()); } } - RemapInstruction(I, VMap); + RemapInstruction(I, VMap, ModuleLevelChanges); } } @@ -477,7 +477,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) { Value *InVal = MapValue(PN->getIncomingValue(pred), - VMap); + VMap, ModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index fc603d23e9ace..b347bf597f8ef 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -17,7 +17,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/TypeSymbolTable.h" #include "llvm/Constant.h" -#include "ValueMapper.h" +#include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; /// CloneModule - Return an exact copy of the specified module. This is not as @@ -89,7 +89,8 @@ Module *llvm::CloneModule(const Module *M, GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); if (I->hasInitializer()) GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(), - VMap))); + VMap, + true))); GV->setLinkage(I->getLinkage()); GV->setThreadLocal(I->isThreadLocal()); GV->setConstant(I->isConstant()); @@ -108,7 +109,7 @@ Module *llvm::CloneModule(const Module *M, } SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned. - CloneFunctionInto(F, I, VMap, Returns); + CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns); } F->setLinkage(I->getLinkage()); @@ -120,34 +121,17 @@ Module *llvm::CloneModule(const Module *M, GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); GA->setLinkage(I->getLinkage()); if (const Constant* C = I->getAliasee()) - GA->setAliasee(cast<Constant>(MapValue(C, VMap))); + GA->setAliasee(cast<Constant>(MapValue(C, VMap, true))); } // And named metadata.... for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), E = M->named_metadata_end(); I != E; ++I) { const NamedMDNode &NMD = *I; - SmallVector<MDNode*, 4> MDs; + NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - MDs.push_back(cast<MDNode>(MapValue(NMD.getOperand(i), VMap))); - NamedMDNode::Create(New->getContext(), NMD.getName(), - MDs.data(), MDs.size(), New); + NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, true))); } - // Update metadata attach with instructions. - for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI) - for (Function::iterator FI = MI->begin(), FE = MI->end(); - FI != FE; ++FI) - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) { - SmallVector<std::pair<unsigned, MDNode *>, 4 > MDs; - BI->getAllMetadata(MDs); - for (SmallVector<std::pair<unsigned, MDNode *>, 4>::iterator - MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) { - Value *MappedValue = MapValue(MDI->second, VMap); - if (MDI->second != MappedValue && MappedValue) - BI->setMetadata(MDI->first, cast<MDNode>(MappedValue)); - } - } return New; } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 598e7d29e3781..88979e862df26 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -215,12 +215,12 @@ static void UpdateCallGraphAfterInlining(CallSite CS, if (I->second->getFunction() == 0) if (Function *F = CallSite(NewCall).getCalledFunction()) { // Indirect call site resolved to direct call. - CallerNode->addCalledFunction(CallSite::get(NewCall), CG[F]); - + CallerNode->addCalledFunction(CallSite(NewCall), CG[F]); + continue; } - - CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); + + CallerNode->addCalledFunction(CallSite(NewCall), I->second); } // Update the call graph by deleting the edge from Callee to Caller. We must @@ -365,7 +365,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i", + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 090af95c4b87b..5ca82996b42f4 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -23,7 +23,7 @@ using namespace llvm; namespace { struct InstNamer : public FunctionPass { static char ID; // Pass identification, replacement for typeid - InstNamer() : FunctionPass(&ID) {} + InstNamer() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &Info) const { Info.setPreservesAll(); @@ -48,12 +48,12 @@ namespace { }; char InstNamer::ID = 0; - static RegisterPass<InstNamer> X("instnamer", - "Assign names to anonymous instructions"); + INITIALIZE_PASS(InstNamer, "instnamer", + "Assign names to anonymous instructions", false, false); } -const PassInfo *const llvm::InstructionNamerID = &X; +char &llvm::InstructionNamerID = InstNamer::ID; //===----------------------------------------------------------------------===// // // InstructionNamer - Give any unnamed non-void instructions "tmp" names. diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index e90c30bba78e2..275b26508f991 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -47,7 +47,7 @@ STATISTIC(NumLCSSA, "Number of live out of a loop variables"); namespace { struct LCSSA : public LoopPass { static char ID; // Pass identification, replacement for typeid - LCSSA() : LoopPass(&ID) {} + LCSSA() : LoopPass(ID) {} // Cached analysis information for the current function. DominatorTree *DT; @@ -64,22 +64,13 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - // LCSSA doesn't actually require LoopSimplify, but the PassManager - // doesn't know how to schedule LoopSimplify by itself. - AU.addRequiredID(LoopSimplifyID); - AU.addPreservedID(LoopSimplifyID); - AU.addRequiredTransitive<LoopInfo>(); - AU.addPreserved<LoopInfo>(); - AU.addRequiredTransitive<DominatorTree>(); - AU.addPreserved<ScalarEvolution>(); + AU.addRequired<DominatorTree>(); AU.addPreserved<DominatorTree>(); - - // Request DominanceFrontier now, even though LCSSA does - // not use it. This allows Pass Manager to schedule Dominance - // Frontier early enough such that one LPPassManager can handle - // multiple loop transformation passes. - AU.addRequired<DominanceFrontier>(); AU.addPreserved<DominanceFrontier>(); + AU.addRequired<LoopInfo>(); + AU.addPreserved<LoopInfo>(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved<ScalarEvolution>(); } private: bool ProcessInstruction(Instruction *Inst, @@ -99,10 +90,10 @@ namespace { } char LCSSA::ID = 0; -static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass"); +INITIALIZE_PASS(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false); Pass *llvm::createLCSSAPass() { return new LCSSA(); } -const PassInfo *const llvm::LCSSAID = &X; +char &llvm::LCSSAID = LCSSA::ID; /// BlockDominatesAnExit - Return true if the specified block dominates at least @@ -215,7 +206,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, DomTreeNode *DomNode = DT->getNode(DomBB); SSAUpdater SSAUpdate; - SSAUpdate.Initialize(Inst); + SSAUpdate.Initialize(Inst->getType(), Inst->getName()); // Insert the LCSSA phi's into all of the exit blocks dominated by the // value, and add them to the Phi's map. diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 8e9113871f47b..52f0499f39b0e 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -490,6 +490,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { /// rewriting all the predecessors to branch to the successor block and return /// true. If we can't transform, return false. bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { + assert(BB != &BB->getParent()->getEntryBlock() && + "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); + // We can't eliminate infinite loops. BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0); if (BB == Succ) return false; diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 4f4edf3a754c1..b3c4801a4f15b 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -46,9 +46,9 @@ #include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" @@ -65,27 +65,30 @@ STATISTIC(NumNested , "Number of nested loops split out"); namespace { struct LoopSimplify : public LoopPass { static char ID; // Pass identification, replacement for typeid - LoopSimplify() : LoopPass(&ID) {} + LoopSimplify() : LoopPass(ID) {} // AA - If we have an alias analysis object to update, this is it, otherwise // this is null. AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; + ScalarEvolution *SE; Loop *L; virtual bool runOnLoop(Loop *L, LPPassManager &LPM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { // We need loop information to identify the loops... - AU.addRequiredTransitive<LoopInfo>(); - AU.addRequiredTransitive<DominatorTree>(); + AU.addRequired<DominatorTree>(); + AU.addPreserved<DominatorTree>(); + AU.addRequired<LoopInfo>(); AU.addPreserved<LoopInfo>(); - AU.addPreserved<DominatorTree>(); - AU.addPreserved<DominanceFrontier>(); + AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + AU.addPreserved<DominanceFrontier>(); + AU.addPreservedID(LCSSAID); } /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. @@ -104,11 +107,11 @@ namespace { } char LoopSimplify::ID = 0; -static RegisterPass<LoopSimplify> -X("loopsimplify", "Canonicalize natural loops", true); +INITIALIZE_PASS(LoopSimplify, "loopsimplify", + "Canonicalize natural loops", true, false); // Publically exposed interface to pass... -const PassInfo *const llvm::LoopSimplifyID = &X; +char &llvm::LoopSimplifyID = LoopSimplify::ID; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } /// runOnLoop - Run down all loops in the CFG (recursively, but we could do @@ -120,6 +123,7 @@ bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) { LI = &getAnalysis<LoopInfo>(); AA = getAnalysisIfAvailable<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); + SE = getAnalysisIfAvailable<ScalarEvolution>(); Changed |= ProcessLoop(L, LPM); @@ -141,15 +145,16 @@ ReprocessLoop: BB != E; ++BB) { if (*BB == L->getHeader()) continue; - SmallPtrSet<BasicBlock *, 4> BadPreds; - for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI){ + SmallPtrSet<BasicBlock*, 4> BadPreds; + for (pred_iterator PI = pred_begin(*BB), + PE = pred_end(*BB); PI != PE; ++PI) { BasicBlock *P = *PI; if (!L->contains(P)) BadPreds.insert(P); } // Delete each unique out-of-loop (and thus dead) predecessor. - for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(), + for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(), E = BadPreds.end(); I != E; ++I) { DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "; @@ -530,6 +535,12 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); + // If ScalarEvolution is around and knows anything about values in + // this loop, tell it to forget them, because we're about to + // substantially change it. + if (SE) + SE->forgetLoop(L); + BasicBlock *Header = L->getHeader(); BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0], OuterLoopPreds.size(), @@ -619,6 +630,11 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ BasicBlock *P = *I; + + // Indirectbr edges cannot be split, so we must fail if we find one. + if (isa<IndirectBrInst>(P->getTerminator())) + return 0; + if (P != Preheader) BackedgeBlocks.push_back(P); } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index e0e07e7bbc821..236bbe9057bfc 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -127,6 +128,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) return false; } + // Notify ScalarEvolution that the loop will be substantially changed, + // if not outright eliminated. + if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) + SE->forgetLoop(L); + // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); // Find trip multiple if count is not available diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 2696e6913f3bf..a46dd8402aca7 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -78,14 +78,14 @@ namespace { static char ID; // Pass identification, replacement for typeid explicit LowerInvoke(const TargetLowering *tli = NULL, bool useExpensiveEHSupport = ExpensiveEHSupport) - : FunctionPass(&ID), useExpensiveEHSupport(useExpensiveEHSupport), + : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport), TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { // This is a cluster of orthogonal Transforms - AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreserved("mem2reg"); AU.addPreservedID(LowerSwitchID); } @@ -100,10 +100,11 @@ namespace { } char LowerInvoke::ID = 0; -static RegisterPass<LowerInvoke> -X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators"); +INITIALIZE_PASS(LowerInvoke, "lowerinvoke", + "Lower invoke and unwind, for unwindless code generators", + false, false); -const PassInfo *const llvm::LowerInvokePassID = &X; +char &llvm::LowerInvokePassID = LowerInvoke::ID; // Public Interface To the LowerInvoke pass. FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 468a5fe4c5e5d..5530b4700aac6 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -29,19 +29,18 @@ using namespace llvm; namespace { /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch - /// instructions. Note that this cannot be a BasicBlock pass because it - /// modifies the CFG! + /// instructions. class LowerSwitch : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid - LowerSwitch() : FunctionPass(&ID) {} + LowerSwitch() : FunctionPass(ID) {} virtual bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { // This is a cluster of orthogonal Transforms AU.addPreserved<UnifyFunctionExitNodes>(); - AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreserved("mem2reg"); AU.addPreservedID(LowerInvokePassID); } @@ -50,8 +49,7 @@ namespace { Constant* High; BasicBlock* BB; - CaseRange() : Low(0), High(0), BB(0) { } - CaseRange(Constant* low, Constant* high, BasicBlock* bb) : + CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) : Low(low), High(high), BB(bb) { } }; @@ -81,11 +79,11 @@ namespace { } char LowerSwitch::ID = 0; -static RegisterPass<LowerSwitch> -X("lowerswitch", "Lower SwitchInst's to branches"); +INITIALIZE_PASS(LowerSwitch, "lowerswitch", + "Lower SwitchInst's to branches", false, false); // Publically exposed interface to pass... -const PassInfo *const llvm::LowerSwitchID = &X; +char &llvm::LowerSwitchID = LowerSwitch::ID; // createLowerSwitchPass - Interface to this file... FunctionPass *llvm::createLowerSwitchPass() { return new LowerSwitch(); diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index 99203b662120e..101645bd92b77 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -27,7 +27,7 @@ STATISTIC(NumPromoted, "Number of alloca's promoted"); namespace { struct PromotePass : public FunctionPass { static char ID; // Pass identification, replacement for typeid - PromotePass() : FunctionPass(&ID) {} + PromotePass() : FunctionPass(ID) {} // runOnFunction - To run this pass, first we calculate the alloca // instructions that are safe for promotion, then we promote each one. @@ -49,7 +49,8 @@ namespace { } // end of anonymous namespace char PromotePass::ID = 0; -static RegisterPass<PromotePass> X("mem2reg", "Promote Memory to Register"); +INITIALIZE_PASS(PromotePass, "mem2reg", "Promote Memory to Register", + false, false); bool PromotePass::runOnFunction(Function &F) { std::vector<AllocaInst*> Allocas; @@ -81,8 +82,6 @@ bool PromotePass::runOnFunction(Function &F) { return Changed; } -// Publically exposed interface to pass... -const PassInfo *const llvm::PromoteMemoryToRegisterID = &X; // createPromoteMemoryToRegister - Provide an entry point to create this pass. // FunctionPass *llvm::createPromoteMemoryToRegisterPass() { diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index c0de1938b2db3..a4e3029e3a5a7 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -228,14 +228,6 @@ namespace { void run(); - /// properlyDominates - Return true if I1 properly dominates I2. - /// - bool properlyDominates(Instruction *I1, Instruction *I2) const { - if (InvokeInst *II = dyn_cast<InvokeInst>(I1)) - I1 = II->getNormalDest()->begin(); - return DT.properlyDominates(I1->getParent(), I2->getParent()); - } - /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. /// bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { @@ -896,11 +888,12 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, DIVar, SI); // Propagate any debug metadata from the store onto the dbg.value. - if (MDNode *SIMD = SI->getMetadata("dbg")) - DbgVal->setMetadata("dbg", SIMD); + DebugLoc SIDL = SI->getDebugLoc(); + if (!SIDL.isUnknown()) + DbgVal->setDebugLoc(SIDL); // Otherwise propagate debug metadata from dbg.declare. - else if (MDNode *MD = DDI->getMetadata("dbg")) - DbgVal->setMetadata("dbg", MD); + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); } // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index f4bdb527655ab..c855988307ea7 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -29,20 +29,21 @@ static AvailableValsTy &getAvailableVals(void *AV) { } SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) - : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {} + : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete &getAvailableVals(AV); } /// Initialize - Reset this object to get ready for a new set of SSA -/// updates. ProtoValue is the value used to name PHI nodes. -void SSAUpdater::Initialize(Value *ProtoValue) { +/// updates with type 'Ty'. PHI nodes get a name based on 'Name'. +void SSAUpdater::Initialize(const Type *Ty, StringRef Name) { if (AV == 0) AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); - PrototypeValue = ProtoValue; + ProtoType = Ty; + ProtoName = Name; } /// HasValueForBlock - Return true if the SSAUpdater already has a value for @@ -54,8 +55,8 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { /// AddAvailableValue - Indicate that a rewritten value is available in the /// specified block with the specified value. void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { - assert(PrototypeValue != 0 && "Need to initialize SSAUpdater"); - assert(PrototypeValue->getType() == V->getType() && + assert(ProtoType != 0 && "Need to initialize SSAUpdater"); + assert(ProtoType == V->getType() && "All rewritten values must have the same type"); getAvailableVals(AV)[BB] = V; } @@ -148,7 +149,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // If there are no predecessors, just return undef. if (PredValues.empty()) - return UndefValue::get(PrototypeValue->getType()); + return UndefValue::get(ProtoType); // Otherwise, if all the merged values are the same, just use it. if (SingularValue != 0) @@ -168,9 +169,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { } // Ok, we have no way out, insert a new one now. - PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), - PrototypeValue->getName(), - &BB->front()); + PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front()); InsertedPHI->reserveOperandSpace(PredValues.size()); // Fill in all the predecessors of the PHI. @@ -205,6 +204,22 @@ void SSAUpdater::RewriteUse(Use &U) { U.set(V); } +/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However, +/// this version of the method can rewrite uses in the same block as a +/// definition, because it assumes that all uses of a value are below any +/// inserted values. +void SSAUpdater::RewriteUseAfterInsertions(Use &U) { + Instruction *User = cast<Instruction>(U.getUser()); + + Value *V; + if (PHINode *UserPN = dyn_cast<PHINode>(User)) + V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U)); + else + V = GetValueAtEndOfBlock(User->getParent()); + + U.set(V); +} + /// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator /// in the SSAUpdaterImpl template. namespace { @@ -266,15 +281,14 @@ public: /// GetUndefVal - Get an undefined value of the same type as the value /// being handled. static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) { - return UndefValue::get(Updater->PrototypeValue->getType()); + return UndefValue::get(Updater->ProtoType); } /// CreateEmptyPHI - Create a new PHI instruction in the specified block. /// Reserve space for the operands but do not fill them in yet. static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, SSAUpdater *Updater) { - PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(), - Updater->PrototypeValue->getName(), + PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName, &BB->front()); PHI->reserveOperandSpace(NumPreds); return PHI; diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp deleted file mode 100644 index 4e813ddf95c7d..0000000000000 --- a/lib/Transforms/Utils/SSI.cpp +++ /dev/null @@ -1,432 +0,0 @@ -//===------------------- SSI.cpp - Creates SSI Representation -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass converts a list of variables to the Static Single Information -// form. This is a program representation described by Scott Ananian in his -// Master Thesis: "The Static Single Information Form (1999)". -// We are building an on-demand representation, that is, we do not convert -// every single variable in the target function to SSI form. Rather, we receive -// a list of target variables that must be converted. We also do not -// completely convert a target variable to the SSI format. Instead, we only -// change the variable in the points where new information can be attached -// to its live range, that is, at branch points. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ssi" - -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/SSI.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" - -using namespace llvm; - -static const std::string SSI_PHI = "SSI_phi"; -static const std::string SSI_SIG = "SSI_sigma"; - -STATISTIC(NumSigmaInserted, "Number of sigma functions inserted"); -STATISTIC(NumPhiInserted, "Number of phi functions inserted"); - -void SSI::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredTransitive<DominanceFrontier>(); - AU.addRequiredTransitive<DominatorTree>(); - AU.setPreservesAll(); -} - -bool SSI::runOnFunction(Function &F) { - DT_ = &getAnalysis<DominatorTree>(); - return false; -} - -/// This methods creates the SSI representation for the list of values -/// received. It will only create SSI representation if a value is used -/// to decide a branch. Repeated values are created only once. -/// -void SSI::createSSI(SmallVectorImpl<Instruction *> &value) { - init(value); - - SmallPtrSet<Instruction*, 4> needConstruction; - for (SmallVectorImpl<Instruction*>::iterator I = value.begin(), - E = value.end(); I != E; ++I) - if (created.insert(*I)) - needConstruction.insert(*I); - - insertSigmaFunctions(needConstruction); - - // Test if there is a need to transform to SSI - if (!needConstruction.empty()) { - insertPhiFunctions(needConstruction); - renameInit(needConstruction); - rename(DT_->getRoot()); - fixPhis(); - } - - clean(); -} - -/// Insert sigma functions (a sigma function is a phi function with one -/// operator) -/// -void SSI::insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value) { - for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), - E = value.end(); I != E; ++I) { - for (Value::use_iterator begin = (*I)->use_begin(), - end = (*I)->use_end(); begin != end; ++begin) { - // Test if the Use of the Value is in a comparator - if (CmpInst *CI = dyn_cast<CmpInst>(begin)) { - // Iterates through all uses of CmpInst - for (Value::use_iterator begin_ci = CI->use_begin(), - end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) { - // Test if any use of CmpInst is in a Terminator - if (TerminatorInst *TI = dyn_cast<TerminatorInst>(begin_ci)) { - insertSigma(TI, *I); - } - } - } - } - } -} - -/// Inserts Sigma Functions in every BasicBlock successor to Terminator -/// Instruction TI. All inserted Sigma Function are related to Instruction I. -/// -void SSI::insertSigma(TerminatorInst *TI, Instruction *I) { - // Basic Block of the Terminator Instruction - BasicBlock *BB = TI->getParent(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { - // Next Basic Block - BasicBlock *BB_next = TI->getSuccessor(i); - if (BB_next != BB && - BB_next->getSinglePredecessor() != NULL && - dominateAny(BB_next, I)) { - PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin()); - PN->addIncoming(I, BB); - sigmas[PN] = I; - created.insert(PN); - defsites[I].push_back(BB_next); - ++NumSigmaInserted; - } - } -} - -/// Insert phi functions when necessary -/// -void SSI::insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value) { - DominanceFrontier *DF = &getAnalysis<DominanceFrontier>(); - for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), - E = value.end(); I != E; ++I) { - // Test if there were any sigmas for this variable - SmallPtrSet<BasicBlock *, 16> BB_visited; - - // Insert phi functions if there is any sigma function - while (!defsites[*I].empty()) { - - BasicBlock *BB = defsites[*I].back(); - - defsites[*I].pop_back(); - DominanceFrontier::iterator DF_BB = DF->find(BB); - - // The BB is unreachable. Skip it. - if (DF_BB == DF->end()) - continue; - - // Iterates through all the dominance frontier of BB - for (std::set<BasicBlock *>::iterator DF_BB_begin = - DF_BB->second.begin(), DF_BB_end = DF_BB->second.end(); - DF_BB_begin != DF_BB_end; ++DF_BB_begin) { - BasicBlock *BB_dominated = *DF_BB_begin; - - // Test if has not yet visited this node and if the - // original definition dominates this node - if (BB_visited.insert(BB_dominated) && - DT_->properlyDominates(value_original[*I], BB_dominated) && - dominateAny(BB_dominated, *I)) { - PHINode *PN = PHINode::Create( - (*I)->getType(), SSI_PHI, BB_dominated->begin()); - phis.insert(std::make_pair(PN, *I)); - created.insert(PN); - - defsites[*I].push_back(BB_dominated); - ++NumPhiInserted; - } - } - } - BB_visited.clear(); - } -} - -/// Some initialization for the rename part -/// -void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) { - for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), - E = value.end(); I != E; ++I) - value_stack[*I].push_back(*I); -} - -/// Renames all variables in the specified BasicBlock. -/// Only variables that need to be rename will be. -/// -void SSI::rename(BasicBlock *BB) { - SmallPtrSet<Instruction*, 8> defined; - - // Iterate through instructions and make appropriate renaming. - // For SSI_PHI (b = PHI()), store b at value_stack as a new - // definition of the variable it represents. - // For SSI_SIG (b = PHI(a)), substitute a with the current - // value of a, present in the value_stack. - // Then store bin the value_stack as the new definition of a. - // For all other instructions (b = OP(a, c, d, ...)), we need to substitute - // all operands with its current value, present in value_stack. - for (BasicBlock::iterator begin = BB->begin(), end = BB->end(); - begin != end; ++begin) { - Instruction *I = begin; - if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions - Instruction* position; - - // Treat SSI_PHI - if ((position = getPositionPhi(PN))) { - value_stack[position].push_back(PN); - defined.insert(position); - // Treat SSI_SIG - } else if ((position = getPositionSigma(PN))) { - substituteUse(I); - value_stack[position].push_back(PN); - defined.insert(position); - } - - // Treat all other PHI functions - else { - substituteUse(I); - } - } - - // Treat all other functions - else { - substituteUse(I); - } - } - - // This loop iterates in all BasicBlocks that are successors of the current - // BasicBlock. For each SSI_PHI instruction found, insert an operand. - // This operand is the current operand in value_stack for the variable - // in "position". And the BasicBlock this operand represents is the current - // BasicBlock. - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) { - BasicBlock *BB_succ = *SI; - - for (BasicBlock::iterator begin = BB_succ->begin(), - notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) { - Instruction *I = begin; - PHINode *PN = dyn_cast<PHINode>(I); - Instruction* position; - if (PN && ((position = getPositionPhi(PN)))) { - PN->addIncoming(value_stack[position].back(), BB); - } - } - } - - // This loop calls rename on all children from this block. This time children - // refers to a successor block in the dominance tree. - DomTreeNode *DTN = DT_->getNode(BB); - for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end(); - begin != end; ++begin) { - DomTreeNodeBase<BasicBlock> *DTN_children = *begin; - BasicBlock *BB_children = DTN_children->getBlock(); - rename(BB_children); - } - - // Now we remove all inserted definitions of a variable from the top of - // the stack leaving the previous one as the top. - for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(), - DE = defined.end(); DI != DE; ++DI) - value_stack[*DI].pop_back(); -} - -/// Substitute any use in this instruction for the last definition of -/// the variable -/// -void SSI::substituteUse(Instruction *I) { - for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) { - Value *operand = I->getOperand(i); - for (DenseMap<Instruction*, SmallVector<Instruction*, 1> >::iterator - VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) { - if (operand == VI->second.front() && - I != VI->second.back()) { - PHINode *PN_I = dyn_cast<PHINode>(I); - PHINode *PN_vs = dyn_cast<PHINode>(VI->second.back()); - - // If a phi created in a BasicBlock is used as an operand of another - // created in the same BasicBlock, this step marks this second phi, - // to fix this issue later. It cannot be fixed now, because the - // operands of the first phi are not final yet. - if (PN_I && PN_vs && - VI->second.back()->getParent() == I->getParent()) { - - phisToFix.insert(PN_I); - } - - I->setOperand(i, VI->second.back()); - break; - } - } - } -} - -/// Test if the BasicBlock BB dominates any use or definition of value. -/// If it dominates a phi instruction that is on the same BasicBlock, -/// that does not count. -/// -bool SSI::dominateAny(BasicBlock *BB, Instruction *value) { - for (Value::use_iterator begin = value->use_begin(), - end = value->use_end(); begin != end; ++begin) { - Instruction *I = cast<Instruction>(*begin); - BasicBlock *BB_father = I->getParent(); - if (BB == BB_father && isa<PHINode>(I)) - continue; - if (DT_->dominates(BB, BB_father)) { - return true; - } - } - return false; -} - -/// When there is a phi node that is created in a BasicBlock and it is used -/// as an operand of another phi function used in the same BasicBlock, -/// LLVM looks this as an error. So on the second phi, the first phi is called -/// P and the BasicBlock it incomes is B. This P will be replaced by the value -/// it has for BasicBlock B. It also includes undef values for predecessors -/// that were not included in the phi. -/// -void SSI::fixPhis() { - for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(), - end = phisToFix.end(); begin != end; ++begin) { - PHINode *PN = *begin; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) { - PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i)); - if (PN_father && PN->getParent() == PN_father->getParent() && - !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) { - BasicBlock *BB = PN->getIncomingBlock(i); - int pos = PN_father->getBasicBlockIndex(BB); - PN->setIncomingValue(i, PN_father->getIncomingValue(pos)); - } - } - } - - for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(), - end = phis.end(); begin != end; ++begin) { - PHINode *PN = begin->first; - BasicBlock *BB = PN->getParent(); - pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - SmallVector<BasicBlock*, 8> Preds(PI, PE); - for (unsigned size = Preds.size(); - PI != PE && PN->getNumIncomingValues() != size; ++PI) { - bool found = false; - for (unsigned i = 0, pn_end = PN->getNumIncomingValues(); - i < pn_end; ++i) { - if (PN->getIncomingBlock(i) == *PI) { - found = true; - break; - } - } - if (!found) { - PN->addIncoming(UndefValue::get(PN->getType()), *PI); - } - } - } -} - -/// Return which variable (position on the vector of variables) this phi -/// represents on the phis list. -/// -Instruction* SSI::getPositionPhi(PHINode *PN) { - DenseMap<PHINode *, Instruction*>::iterator val = phis.find(PN); - if (val == phis.end()) - return 0; - else - return val->second; -} - -/// Return which variable (position on the vector of variables) this phi -/// represents on the sigmas list. -/// -Instruction* SSI::getPositionSigma(PHINode *PN) { - DenseMap<PHINode *, Instruction*>::iterator val = sigmas.find(PN); - if (val == sigmas.end()) - return 0; - else - return val->second; -} - -/// Initializes -/// -void SSI::init(SmallVectorImpl<Instruction *> &value) { - for (SmallVectorImpl<Instruction *>::iterator I = value.begin(), - E = value.end(); I != E; ++I) { - value_original[*I] = (*I)->getParent(); - defsites[*I].push_back((*I)->getParent()); - } -} - -/// Clean all used resources in this creation of SSI -/// -void SSI::clean() { - phis.clear(); - sigmas.clear(); - phisToFix.clear(); - - defsites.clear(); - value_stack.clear(); - value_original.clear(); -} - -/// createSSIPass - The public interface to this file... -/// -FunctionPass *llvm::createSSIPass() { return new SSI(); } - -char SSI::ID = 0; -static RegisterPass<SSI> X("ssi", "Static Single Information Construction"); - -/// SSIEverything - A pass that runs createSSI on every non-void variable, -/// intended for debugging. -namespace { - struct SSIEverything : public FunctionPass { - static char ID; // Pass identification, replacement for typeid - SSIEverything() : FunctionPass(&ID) {} - - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<SSI>(); - } - }; -} - -bool SSIEverything::runOnFunction(Function &F) { - SmallVector<Instruction *, 16> Insts; - SSI &ssi = getAnalysis<SSI>(); - - if (F.isDeclaration() || F.isIntrinsic()) return false; - - for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) - for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) - if (!I->getType()->isVoidTy()) - Insts.push_back(I); - - ssi.createSSI(Insts); - return true; -} - -/// createSSIEverythingPass - The public interface to this file... -/// -FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); } - -char SSIEverything::ID = 0; -static RegisterPass<SSIEverything> -Y("ssi-everything", "Static Single Information Construction"); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 27b07d9731a58..28d7afbf1c33e 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -949,7 +949,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { UI != E; ++UI) { // Ignore any user that is not a PHI node in BB2. These can only occur in // unreachable blocks, because they would not be dominated by the instr. - PHINode *PN = dyn_cast<PHINode>(UI); + PHINode *PN = dyn_cast<PHINode>(*UI); if (!PN || PN->getParent() != BB2) return false; PHIUses.push_back(PN); @@ -1724,12 +1724,12 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { assert(BB && BB->getParent() && "Block not embedded in function!"); assert(BB->getTerminator() && "Degenerate basic block encountered!"); - assert(&BB->getParent()->getEntryBlock() != BB && - "Can't Simplify entry block!"); - // Remove basic blocks that have no predecessors... or that just have themself - // as a predecessor. These are unreachable. - if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) { + // Remove basic blocks that have no predecessors (except the entry block)... + // or that just have themself as a predecessor. These are unreachable. + if ((pred_begin(BB) == pred_end(BB) && + &BB->getParent()->getEntryBlock() != BB) || + BB->getSinglePredecessor() == BB) { DEBUG(dbgs() << "Removing BB: \n" << *BB); DeleteDeadBlock(BB); return true; @@ -1880,8 +1880,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { while (isa<DbgInfoIntrinsic>(BBI)) ++BBI; if (BBI->isTerminator()) // Terminator is the only non-phi instruction! - if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) - return true; + if (BB != &BB->getParent()->getEntryBlock()) + if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) + return true; } else { // Conditional branch if (isValueEqualityComparison(BI)) { @@ -2049,12 +2050,38 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { } // If this block is now dead, remove it. - if (pred_begin(BB) == pred_end(BB)) { + if (pred_begin(BB) == pred_end(BB) && + BB != &BB->getParent()->getEntryBlock()) { // We know there are no successors, so just nuke the block. M->getBasicBlockList().erase(BB); return true; } } + } else if (IndirectBrInst *IBI = + dyn_cast<IndirectBrInst>(BB->getTerminator())) { + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { + BasicBlock *Dest = IBI->getDestination(i); + if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) { + Dest->removePredecessor(BB); + IBI->removeDestination(i); + --i; --e; + Changed = true; + } + } + + if (IBI->getNumDestinations() == 0) { + // If the indirectbr has no successors, change it to unreachable. + new UnreachableInst(IBI->getContext(), IBI); + IBI->eraseFromParent(); + Changed = true; + } else if (IBI->getNumDestinations() == 1) { + // If the indirectbr has one successor, change it to a direct branch. + BranchInst::Create(IBI->getDestination(0), IBI); + IBI->eraseFromParent(); + Changed = true; + } } // Merge basic blocks into their predecessor if there is only one distinct @@ -2068,12 +2095,15 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // is a conditional branch, see if we can hoist any code from this block up // into our predecessor. pred_iterator PI(pred_begin(BB)), PE(pred_end(BB)); - BasicBlock *OnlyPred = *PI++; - for (; PI != PE; ++PI) // Search all predecessors, see if they are all same - if (*PI != OnlyPred) { + BasicBlock *OnlyPred = 0; + for (; PI != PE; ++PI) { // Search all predecessors, see if they are all same + if (!OnlyPred) + OnlyPred = *PI; + else if (*PI != OnlyPred) { OnlyPred = 0; // There are multiple different predecessors... break; } + } if (OnlyPred) if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator())) @@ -2172,8 +2202,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// eliminates unreachable basic blocks, and does other "peephole" optimization /// of the CFG. It returns true if a modification was made. /// -/// WARNING: The entry node of a function may not be simplified. -/// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) { return SimplifyCFGOpt(TD).run(BB); } diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 3fa8b70a8505a..a51f1e1a47f65 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -24,8 +24,8 @@ using namespace llvm; char UnifyFunctionExitNodes::ID = 0; -static RegisterPass<UnifyFunctionExitNodes> -X("mergereturn", "Unify function exit nodes"); +INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn", + "Unify function exit nodes", false, false); Pass *llvm::createUnifyFunctionExitNodesPass() { return new UnifyFunctionExitNodes(); @@ -35,7 +35,7 @@ void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ // We preserve the non-critical-edgeness property AU.addPreservedID(BreakCriticalEdgesID); // This is a cluster of orthogonal Transforms - AU.addPreservedID(PromoteMemoryToRegisterID); + AU.addPreserved("mem2reg"); AU.addPreservedID(LowerSwitchID); } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 3f6a90c94ebb6..fc4bde77d4f95 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "ValueMapper.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Type.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -20,28 +20,51 @@ #include "llvm/ADT/SmallVector.h" using namespace llvm; -Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { +Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, + bool ModuleLevelChanges) { Value *&VMSlot = VM[V]; if (VMSlot) return VMSlot; // Does it exist in the map yet? // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the // DenseMap. This includes any recursive calls to MapValue. - // Global values and non-function-local metadata do not need to be seeded into - // the VM if they are using the identity mapping. + // Global values do not need to be seeded into the VM if they + // are using the identity mapping. if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) || - (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal())) + (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal() && + !ModuleLevelChanges)) return VMSlot = const_cast<Value*>(V); if (const MDNode *MD = dyn_cast<MDNode>(V)) { - SmallVector<Value*, 4> Elts; - for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) - Elts.push_back(MD->getOperand(i) ? MapValue(MD->getOperand(i), VM) : 0); - return VM[V] = MDNode::get(V->getContext(), Elts.data(), Elts.size()); + // Start by assuming that we'll use the identity mapping. + VMSlot = const_cast<Value*>(V); + + // Check all operands to see if any need to be remapped. + for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { + Value *OP = MD->getOperand(i); + if (!OP || MapValue(OP, VM, ModuleLevelChanges) == OP) continue; + + // Ok, at least one operand needs remapping. + MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0); + VM[V] = Dummy; + SmallVector<Value*, 4> Elts; + Elts.reserve(MD->getNumOperands()); + for (i = 0; i != e; ++i) + Elts.push_back(MD->getOperand(i) ? + MapValue(MD->getOperand(i), VM, ModuleLevelChanges) : 0); + MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size()); + Dummy->replaceAllUsesWith(NewMD); + MDNode::deleteTemporary(Dummy); + return VM[V] = NewMD; + } + + // No operands needed remapping; keep the identity map. + return const_cast<Value*>(V); } Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V)); - if (C == 0) return 0; + if (C == 0) + return 0; if (isa<ConstantInt>(C) || isa<ConstantFP>(C) || isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) || @@ -51,7 +74,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) { for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, ModuleLevelChanges); if (MV != *i) { // This array must contain a reference to a global, make a new array // and return it. @@ -62,7 +85,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { Values.push_back(cast<Constant>(*j)); Values.push_back(cast<Constant>(MV)); for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM))); + Values.push_back(cast<Constant>(MapValue(*i, VM, + ModuleLevelChanges))); return VM[V] = ConstantArray::get(CA->getType(), Values); } } @@ -72,7 +96,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, ModuleLevelChanges); if (MV != *i) { // This struct must contain a reference to a global, make a new struct // and return it. @@ -83,7 +107,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { Values.push_back(cast<Constant>(*j)); Values.push_back(cast<Constant>(MV)); for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM))); + Values.push_back(cast<Constant>(MapValue(*i, VM, + ModuleLevelChanges))); return VM[V] = ConstantStruct::get(CS->getType(), Values); } } @@ -93,14 +118,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { std::vector<Constant*> Ops; for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) - Ops.push_back(cast<Constant>(MapValue(*i, VM))); + Ops.push_back(cast<Constant>(MapValue(*i, VM, ModuleLevelChanges))); return VM[V] = CE->getWithOperands(Ops); } if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) { for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, ModuleLevelChanges); if (MV != *i) { // This vector value must contain a reference to a global, make a new // vector constant and return it. @@ -111,7 +136,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { Values.push_back(cast<Constant>(*j)); Values.push_back(cast<Constant>(MV)); for (++i; i != e; ++i) - Values.push_back(cast<Constant>(MapValue(*i, VM))); + Values.push_back(cast<Constant>(MapValue(*i, VM, + ModuleLevelChanges))); return VM[V] = ConstantVector::get(Values); } } @@ -119,19 +145,33 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { } BlockAddress *BA = cast<BlockAddress>(C); - Function *F = cast<Function>(MapValue(BA->getFunction(), VM)); - BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM)); + Function *F = cast<Function>(MapValue(BA->getFunction(), VM, + ModuleLevelChanges)); + BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM, + ModuleLevelChanges)); return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); } /// RemapInstruction - Convert the instruction operands from referencing the /// current values into those specified by VMap. /// -void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) { +void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, + bool ModuleLevelChanges) { + // Remap operands. for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, VMap); + Value *V = MapValue(*op, VMap, ModuleLevelChanges); assert(V && "Referenced value not in value map!"); *op = V; } -} + // Remap attached metadata. + SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; + I->getAllMetadata(MDs); + for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator + MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { + Value *Old = MI->second; + Value *New = MapValue(Old, VMap, ModuleLevelChanges); + if (New != Old) + I->setMetadata(MI->first, cast<MDNode>(New)); + } +} diff --git a/lib/Transforms/Utils/ValueMapper.h b/lib/Transforms/Utils/ValueMapper.h deleted file mode 100644 index f4ff643ca03ed..0000000000000 --- a/lib/Transforms/Utils/ValueMapper.h +++ /dev/null @@ -1,29 +0,0 @@ -//===- ValueMapper.h - Interface shared by lib/Transforms/Utils -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the MapValue interface which is used by various parts of -// the Transforms/Utils library to implement cloning and linking facilities. -// -//===----------------------------------------------------------------------===// - -#ifndef VALUEMAPPER_H -#define VALUEMAPPER_H - -#include "llvm/ADT/ValueMap.h" - -namespace llvm { - class Value; - class Instruction; - typedef ValueMap<const Value *, Value *> ValueToValueMapTy; - - Value *MapValue(const Value *V, ValueToValueMapTy &VM); - void RemapInstruction(Instruction *I, ValueToValueMapTy &VM); -} // End llvm namespace - -#endif diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 09b8aa507d833..831a9960463d1 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -16,7 +16,7 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Assembly/PrintModulePass.h" -#include "llvm/Assembly/AsmAnnotationWriter.h" +#include "llvm/Assembly/AssemblyAnnotationWriter.h" #include "llvm/LLVMContext.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" @@ -63,8 +63,6 @@ static const Module *getModuleFromVal(const Value *V) { if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV->getParent(); - if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(V)) - return NMD->getParent(); return 0; } @@ -230,7 +228,7 @@ void TypePrinting::CalcTypeName(const Type *Ty, E = STy->element_end(); I != E; ++I) { OS << ' '; CalcTypeName(*I, TypeStack, OS); - if (next(I) == STy->element_end()) + if (llvm::next(I) == STy->element_end()) OS << ' '; else OS << ','; @@ -240,21 +238,6 @@ void TypePrinting::CalcTypeName(const Type *Ty, OS << '>'; break; } - case Type::UnionTyID: { - const UnionType *UTy = cast<UnionType>(Ty); - OS << "union {"; - for (StructType::element_iterator I = UTy->element_begin(), - E = UTy->element_end(); I != E; ++I) { - OS << ' '; - CalcTypeName(*I, TypeStack, OS); - if (next(I) == UTy->element_end()) - OS << ' '; - else - OS << ','; - } - OS << '}'; - break; - } case Type::PointerTyID: { const PointerType *PTy = cast<PointerType>(Ty); CalcTypeName(PTy->getElementType(), TypeStack, OS); @@ -581,8 +564,12 @@ static SlotTracker *createSlotTracker(const Value *V) { if (const Function *Func = dyn_cast<Function>(V)) return new SlotTracker(Func); - if (isa<MDNode>(V)) + if (const MDNode *MD = dyn_cast<MDNode>(V)) { + if (!MD->isFunctionLocal()) + return new SlotTracker(MD->getFunction()); + return new SlotTracker((Function *)0); + } return 0; } @@ -634,10 +621,8 @@ void SlotTracker::processModule() { I = TheModule->named_metadata_begin(), E = TheModule->named_metadata_end(); I != E; ++I) { const NamedMDNode *NMD = I; - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - if (MDNode *MD = NMD->getOperand(i)) - CreateMetadataSlot(MD); - } + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + CreateMetadataSlot(NMD->getOperand(i)); } // Add all the unnamed functions to the table. @@ -778,15 +763,14 @@ void SlotTracker::CreateMetadataSlot(const MDNode *N) { // Don't insert if N is a function-local metadata, these are always printed // inline. - if (N->isFunctionLocal()) - return; - - mdn_iterator I = mdnMap.find(N); - if (I != mdnMap.end()) - return; + if (!N->isFunctionLocal()) { + mdn_iterator I = mdnMap.find(N); + if (I != mdnMap.end()) + return; - unsigned DestSlot = mdnNext++; - mdnMap[N] = DestSlot; + unsigned DestSlot = mdnNext++; + mdnMap[N] = DestSlot; + } // Recursively add any MDNodes referenced by operands. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) @@ -800,7 +784,8 @@ void SlotTracker::CreateMetadataSlot(const MDNode *N) { static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, TypePrinting *TypePrinter, - SlotTracker *Machine); + SlotTracker *Machine, + const Module *Context); @@ -856,7 +841,8 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, TypePrinting &TypePrinter, - SlotTracker *Machine) { + SlotTracker *Machine, + const Module *Context) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { if (CI->getType()->isIntegerTy(1)) { Out << (CI->getZExtValue() ? "true" : "false"); @@ -972,9 +958,11 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) { Out << "blockaddress("; - WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine); + WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine, + Context); Out << ", "; - WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine); + WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine, + Context); Out << ")"; return; } @@ -994,12 +982,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, TypePrinter.print(ETy, Out); Out << ' '; WriteAsOperandInternal(Out, CA->getOperand(0), - &TypePrinter, Machine); + &TypePrinter, Machine, + Context); for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) { Out << ", "; TypePrinter.print(ETy, Out); Out << ' '; - WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine); + WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine, + Context); } } Out << ']'; @@ -1017,14 +1007,16 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, TypePrinter.print(CS->getOperand(0)->getType(), Out); Out << ' '; - WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine); + WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine, + Context); for (unsigned i = 1; i < N; i++) { Out << ", "; TypePrinter.print(CS->getOperand(i)->getType(), Out); Out << ' '; - WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine); + WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine, + Context); } Out << ' '; } @@ -1035,15 +1027,6 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, return; } - if (const ConstantUnion *CU = dyn_cast<ConstantUnion>(CV)) { - Out << "{ "; - TypePrinter.print(CU->getOperand(0)->getType(), Out); - Out << ' '; - WriteAsOperandInternal(Out, CU->getOperand(0), &TypePrinter, Machine); - Out << " }"; - return; - } - if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { const Type *ETy = CP->getType()->getElementType(); assert(CP->getNumOperands() > 0 && @@ -1051,12 +1034,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, Out << '<'; TypePrinter.print(ETy, Out); Out << ' '; - WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine); + WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine, + Context); for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) { Out << ", "; TypePrinter.print(ETy, Out); Out << ' '; - WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine); + WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine, + Context); } Out << '>'; return; @@ -1087,7 +1072,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) { TypePrinter.print((*OI)->getType(), Out); Out << ' '; - WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine); + WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context); if (OI+1 != CE->op_end()) Out << ", "; } @@ -1112,7 +1097,8 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node, TypePrinting *TypePrinter, - SlotTracker *Machine) { + SlotTracker *Machine, + const Module *Context) { Out << "!{"; for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) { const Value *V = Node->getOperand(mi); @@ -1122,7 +1108,7 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node, TypePrinter->print(V->getType(), Out); Out << ' '; WriteAsOperandInternal(Out, Node->getOperand(mi), - TypePrinter, Machine); + TypePrinter, Machine, Context); } if (mi + 1 != me) Out << ", "; @@ -1138,7 +1124,8 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node, /// static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, TypePrinting *TypePrinter, - SlotTracker *Machine) { + SlotTracker *Machine, + const Module *Context) { if (V->hasName()) { PrintLLVMName(Out, V); return; @@ -1147,7 +1134,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, const Constant *CV = dyn_cast<Constant>(V); if (CV && !isa<GlobalValue>(CV)) { assert(TypePrinter && "Constants require TypePrinting!"); - WriteConstantInternal(Out, CV, *TypePrinter, Machine); + WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context); return; } @@ -1168,12 +1155,16 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, if (const MDNode *N = dyn_cast<MDNode>(V)) { if (N->isFunctionLocal()) { // Print metadata inline, not via slot reference number. - WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine); + WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context); return; } - if (!Machine) - Machine = createSlotTracker(V); + if (!Machine) { + if (N->isFunctionLocal()) + Machine = new SlotTracker(N->getFunction()); + else + Machine = new SlotTracker(Context); + } Out << '!' << Machine->getMetadataSlot(N); return; } @@ -1227,8 +1218,9 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, // Fast path: Don't construct and populate a TypePrinting object if we // won't be needing any types printed. if (!PrintType && - (!isa<Constant>(V) || V->hasName() || isa<GlobalValue>(V))) { - WriteAsOperandInternal(Out, V, 0, 0); + ((!isa<Constant>(V) && !isa<MDNode>(V)) || + V->hasName() || isa<GlobalValue>(V))) { + WriteAsOperandInternal(Out, V, 0, 0, Context); return; } @@ -1242,7 +1234,7 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, Out << ' '; } - WriteAsOperandInternal(Out, V, &TypePrinter, 0); + WriteAsOperandInternal(Out, V, &TypePrinter, 0, Context); } namespace { @@ -1297,7 +1289,7 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { TypePrinter.print(Operand->getType(), Out); Out << ' '; } - WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine); + WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule); } void AssemblyWriter::writeParamOperand(const Value *Operand, @@ -1314,7 +1306,7 @@ void AssemblyWriter::writeParamOperand(const Value *Operand, Out << ' ' << Attribute::getAsString(Attrs); Out << ' '; // Print the operand - WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine); + WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule); } void AssemblyWriter::printModule(const Module *M) { @@ -1403,10 +1395,7 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) { Out << "!" << NMD->getName() << " = !{"; for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { if (i) Out << ", "; - if (MDNode *MD = NMD->getOperand(i)) - Out << '!' << Machine.getMetadataSlot(MD); - else - Out << "null"; + Out << '!' << Machine.getMetadataSlot(NMD->getOperand(i)); } Out << "}\n"; } @@ -1421,6 +1410,9 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT, case GlobalValue::LinkerPrivateWeakLinkage: Out << "linker_private_weak "; break; + case GlobalValue::LinkerPrivateWeakDefAutoLinkage: + Out << "linker_private_weak_def_auto "; + break; case GlobalValue::InternalLinkage: Out << "internal "; break; case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break; case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break; @@ -1451,7 +1443,7 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { if (GV->isMaterializable()) Out << "; Materializable\n"; - WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine); + WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent()); Out << " = "; if (!GV->hasInitializer() && GV->hasExternalLinkage()) @@ -1510,7 +1502,7 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { TypePrinter.print(F->getFunctionType(), Out); Out << "* "; - WriteAsOperandInternal(Out, F, &TypePrinter, &Machine); + WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent()); } else if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Aliasee)) { TypePrinter.print(GA->getType(), Out); Out << ' '; @@ -1593,7 +1585,7 @@ void AssemblyWriter::printFunction(const Function *F) { Out << Attribute::getAsString(Attrs.getRetAttributes()) << ' '; TypePrinter.print(F->getReturnType(), Out); Out << ' '; - WriteAsOperandInternal(Out, F, &TypePrinter, &Machine); + WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent()); Out << '('; Machine.incorporateFunction(F); @@ -1643,11 +1635,10 @@ void AssemblyWriter::printFunction(const Function *F) { if (F->hasGC()) Out << " gc \"" << F->getGC() << '"'; if (F->isDeclaration()) { - Out << "\n"; + Out << '\n'; } else { Out << " {"; - - // Output all of its basic blocks... for the function + // Output all of the function's basic blocks. for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) printBasicBlock(I); @@ -1696,7 +1687,7 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { Out.PadToColumn(50); Out << "; Error: Block without parent!"; } else if (BB != &BB->getParent()->getEntryBlock()) { // Not the entry block? - // Output predecessors for the block... + // Output predecessors for the block. Out.PadToColumn(50); Out << ";"; const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); @@ -1734,13 +1725,6 @@ void AssemblyWriter::printInfoComment(const Value &V) { AnnotationWriter->printInfoComment(V, Out); return; } - - if (V.getType()->isVoidTy()) return; - - Out.PadToColumn(50); - Out << "; <"; - TypePrinter.print(V.getType(), Out); - Out << "> [#uses=" << V.getNumUses() << ']'; // Output # uses } // This member is called for each Instruction in a function.. @@ -2029,7 +2013,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) { } else { Out << ", !<unknown kind #" << Kind << ">"; } - Out << " !" << Machine.getMetadataSlot(InstMD[i].second); + Out << ' '; + WriteAsOperandInternal(Out, InstMD[i].second, &TypePrinter, &Machine, + TheModule); } } printInfoComment(I); @@ -2077,7 +2063,7 @@ void AssemblyWriter::writeAllMDNodes() { } void AssemblyWriter::printMDNodeBody(const MDNode *Node) { - WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine); + WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule); WriteMDNodeComment(Node, Out); Out << "\n"; } @@ -2093,6 +2079,13 @@ void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { W.printModule(this); } +void NamedMDNode::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { + SlotTracker SlotTable(getParent()); + formatted_raw_ostream OS(ROS); + AssemblyWriter W(OS, SlotTable, getParent(), AAW); + W.printNamedMDNode(this); +} + void Type::print(raw_ostream &OS) const { if (this == 0) { OS << "<null Type>"; @@ -2130,15 +2123,11 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { SlotTracker SlotTable(F); AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW); W.printMDNodeBody(N); - } else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(this)) { - SlotTracker SlotTable(N->getParent()); - AssemblyWriter W(OS, SlotTable, N->getParent(), AAW); - W.printNamedMDNode(N); } else if (const Constant *C = dyn_cast<Constant>(this)) { TypePrinting TypePrinter; TypePrinter.print(C->getType(), OS); OS << ' '; - WriteConstantInternal(OS, C, TypePrinter, 0); + WriteConstantInternal(OS, C, TypePrinter, 0, 0); } else if (isa<InlineAsm>(this) || isa<MDString>(this) || isa<Argument>(this)) { WriteAsOperand(OS, this, true, 0); diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index dc39024e39456..9330e141c3412 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -78,6 +78,63 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { NewFn = F; return true; } + } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) { + if (((Name.compare(14, 5, "vmovl", 5) == 0 || + Name.compare(14, 5, "vaddl", 5) == 0 || + Name.compare(14, 5, "vsubl", 5) == 0 || + Name.compare(14, 5, "vaddw", 5) == 0 || + Name.compare(14, 5, "vsubw", 5) == 0 || + Name.compare(14, 5, "vmull", 5) == 0 || + Name.compare(14, 5, "vmlal", 5) == 0 || + Name.compare(14, 5, "vmlsl", 5) == 0 || + Name.compare(14, 5, "vabdl", 5) == 0 || + Name.compare(14, 5, "vabal", 5) == 0) && + (Name.compare(19, 2, "s.", 2) == 0 || + Name.compare(19, 2, "u.", 2) == 0)) || + + (Name.compare(14, 4, "vaba", 4) == 0 && + (Name.compare(18, 2, "s.", 2) == 0 || + Name.compare(18, 2, "u.", 2) == 0)) || + + (Name.compare(14, 6, "vmovn.", 6) == 0)) { + + // Calls to these are transformed into IR without intrinsics. + NewFn = 0; + return true; + } + // Old versions of NEON ld/st intrinsics are missing alignment arguments. + bool isVLd = (Name.compare(14, 3, "vld", 3) == 0); + bool isVSt = (Name.compare(14, 3, "vst", 3) == 0); + if (isVLd || isVSt) { + unsigned NumVecs = Name.at(17) - '0'; + if (NumVecs == 0 || NumVecs > 4) + return false; + bool isLaneOp = (Name.compare(18, 5, "lane.", 5) == 0); + if (!isLaneOp && Name.at(18) != '.') + return false; + unsigned ExpectedArgs = 2; // for the address and alignment + if (isVSt || isLaneOp) + ExpectedArgs += NumVecs; + if (isLaneOp) + ExpectedArgs += 1; // for the lane number + unsigned NumP = FTy->getNumParams(); + if (NumP != ExpectedArgs - 1) + return false; + + // Change the name of the old (bad) intrinsic, because + // its type is incorrect, but we cannot overload that name. + F->setName(""); + + // One argument is missing: add the alignment argument. + std::vector<const Type*> NewParams; + for (unsigned p = 0; p < NumP; ++p) + NewParams.push_back(FTy->getParamType(p)); + NewParams.push_back(Type::getInt32Ty(F->getContext())); + FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), + NewParams, false); + NewFn = cast<Function>(M->getOrInsertFunction(Name, NewFTy)); + return true; + } } break; case 'b': @@ -182,7 +239,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { NewFnName = "llvm.memset.p0i8.i64"; } if (NewFnName) { - const FunctionType *FTy = F->getFunctionType(); NewFn = cast<Function>(M->getOrInsertFunction(NewFnName, FTy->getReturnType(), FTy->getParamType(0), @@ -309,6 +365,73 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { return Upgraded; } +bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { + StringRef Name(GV->getName()); + + // We are only upgrading one symbol here. + if (Name == ".llvm.eh.catch.all.value") { + GV->setName("llvm.eh.catch.all.value"); + return true; + } + + return false; +} + +/// ExtendNEONArgs - For NEON "long" and "wide" operations, where the results +/// have vector elements twice as big as one or both source operands, do the +/// sign- or zero-extension that used to be handled by intrinsics. The +/// extended values are returned via V0 and V1. +static void ExtendNEONArgs(CallInst *CI, Value *Arg0, Value *Arg1, + Value *&V0, Value *&V1) { + Function *F = CI->getCalledFunction(); + const std::string& Name = F->getName(); + bool isLong = (Name.at(18) == 'l'); + bool isSigned = (Name.at(19) == 's'); + + if (isSigned) { + if (isLong) + V0 = new SExtInst(Arg0, CI->getType(), "", CI); + else + V0 = Arg0; + V1 = new SExtInst(Arg1, CI->getType(), "", CI); + } else { + if (isLong) + V0 = new ZExtInst(Arg0, CI->getType(), "", CI); + else + V0 = Arg0; + V1 = new ZExtInst(Arg1, CI->getType(), "", CI); + } +} + +/// CallVABD - As part of expanding a call to one of the old NEON vabdl, vaba, +/// or vabal intrinsics, construct a call to a vabd intrinsic. Examine the +/// name of the old intrinsic to determine whether to use a signed or unsigned +/// vabd intrinsic. Get the type from the old call instruction, adjusted for +/// half-size vector elements if the old intrinsic was vabdl or vabal. +static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) { + Function *F = CI->getCalledFunction(); + const std::string& Name = F->getName(); + bool isLong = (Name.at(18) == 'l'); + bool isSigned = (Name.at(isLong ? 19 : 18) == 's'); + + Intrinsic::ID intID; + if (isSigned) + intID = Intrinsic::arm_neon_vabds; + else + intID = Intrinsic::arm_neon_vabdu; + + const Type *Ty = CI->getType(); + if (isLong) + Ty = VectorType::getTruncatedElementVectorType(cast<const VectorType>(Ty)); + + Function *VABD = Intrinsic::getDeclaration(F->getParent(), intID, &Ty, 1); + Value *Operands[2]; + Operands[0] = Arg0; + Operands[1] = Arg1; + return CallInst::Create(VABD, Operands, Operands+2, + "upgraded."+CI->getName(), CI); +} + // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the // upgraded intrinsic. All argument and return casting must be provided in // order to seamlessly integrate with existing context. @@ -320,6 +443,60 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { assert(F && "CallInst has no function associated with it."); if (!NewFn) { + // Get the Function's name. + const std::string& Name = F->getName(); + + // Upgrade ARM NEON intrinsics. + if (Name.compare(5, 9, "arm.neon.", 9) == 0) { + Instruction *NewI; + Value *V0, *V1; + if (Name.compare(14, 7, "vmovls.", 7) == 0) { + NewI = new SExtInst(CI->getArgOperand(0), CI->getType(), + "upgraded." + CI->getName(), CI); + } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) { + NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(), + "upgraded." + CI->getName(), CI); + } else if (Name.compare(14, 4, "vadd", 4) == 0) { + ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1); + NewI = BinaryOperator::CreateAdd(V0, V1, "upgraded."+CI->getName(), CI); + } else if (Name.compare(14, 4, "vsub", 4) == 0) { + ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1); + NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI); + } else if (Name.compare(14, 4, "vmul", 4) == 0) { + ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1); + NewI = BinaryOperator::CreateMul(V0, V1,"upgraded."+CI->getName(),CI); + } else if (Name.compare(14, 4, "vmla", 4) == 0) { + ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1); + Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI); + NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), MulI, + "upgraded."+CI->getName(), CI); + } else if (Name.compare(14, 4, "vmls", 4) == 0) { + ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1); + Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI); + NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI, + "upgraded."+CI->getName(), CI); + } else if (Name.compare(14, 4, "vabd", 4) == 0) { + NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1)); + NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI); + } else if (Name.compare(14, 4, "vaba", 4) == 0) { + NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2)); + if (Name.at(18) == 'l') + NewI = new ZExtInst(NewI, CI->getType(), "", CI); + NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI, + "upgraded."+CI->getName(), CI); + } else if (Name.compare(14, 6, "vmovn.", 6) == 0) { + NewI = new TruncInst(CI->getArgOperand(0), CI->getType(), + "upgraded." + CI->getName(), CI); + } else { + llvm_unreachable("Unknown arm.neon function for CallInst upgrade."); + } + // Replace any uses of the old CallInst. + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + bool isLoadH = false, isLoadL = false, isMovL = false; bool isMovSD = false, isShufPD = false; bool isUnpckhPD = false, isUnpcklPD = false; @@ -398,7 +575,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); } else if (isShufPD) { Value *Op1 = CI->getArgOperand(1); - unsigned MaskVal = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); + unsigned MaskVal = + cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1)); Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), ((MaskVal >> 1) & 1)+2)); @@ -547,7 +725,40 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } switch (NewFn->getIntrinsicID()) { - default: llvm_unreachable("Unknown function for CallInst upgrade."); + default: llvm_unreachable("Unknown function for CallInst upgrade."); + case Intrinsic::arm_neon_vld1: + case Intrinsic::arm_neon_vld2: + case Intrinsic::arm_neon_vld3: + case Intrinsic::arm_neon_vld4: + case Intrinsic::arm_neon_vst1: + case Intrinsic::arm_neon_vst2: + case Intrinsic::arm_neon_vst3: + case Intrinsic::arm_neon_vst4: + case Intrinsic::arm_neon_vld2lane: + case Intrinsic::arm_neon_vld3lane: + case Intrinsic::arm_neon_vld4lane: + case Intrinsic::arm_neon_vst2lane: + case Intrinsic::arm_neon_vst3lane: + case Intrinsic::arm_neon_vst4lane: { + // Add a default alignment argument of 1. + SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end()); + Operands.push_back(ConstantInt::get(Type::getInt32Ty(C), 1)); + CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(), + CI->getName(), CI); + NewCI->setTailCall(CI->isTailCall()); + NewCI->setCallingConv(CI->getCallingConv()); + + // Handle any uses of the old CallInst. + if (!CI->use_empty()) + // Replace all uses of the old call with the new cast which has the + // correct type. + CI->replaceAllUsesWith(NewCI); + + // Clean up the old call now that it has been completely upgraded. + CI->eraseFromParent(); + break; + } + case Intrinsic::x86_mmx_psll_d: case Intrinsic::x86_mmx_psll_q: case Intrinsic::x86_mmx_psll_w: diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index c64564b8a1e78..1388c93cce39c 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_library(LLVMCore Module.cpp Pass.cpp PassManager.cpp + PassRegistry.cpp PrintModulePass.cpp Type.cpp TypeSymbolTable.cpp diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 35672661e4459..9a91dafab2ff7 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -357,22 +357,6 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy, } } - if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) { - unsigned NumElems = UTy->getNumElements(); - // Check for a union with all members having the same size. - Constant *MemberSize = - getFoldedSizeOf(UTy->getElementType(0), DestTy, true); - bool AllSame = true; - for (unsigned i = 1; i != NumElems; ++i) - if (MemberSize != - getFoldedSizeOf(UTy->getElementType(i), DestTy, true)) { - AllSame = false; - break; - } - if (AllSame) - return MemberSize; - } - // Pointer size doesn't depend on the pointee type, so canonicalize them // to an arbitrary pointee. if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) @@ -438,24 +422,6 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy, return MemberAlign; } - if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) { - // Union alignment is the maximum alignment of any member. - // Without target data, we can't compare much, but we can check to see - // if all the members have the same alignment. - unsigned NumElems = UTy->getNumElements(); - // Check for a union with all members having the same alignment. - Constant *MemberAlign = - getFoldedAlignOf(UTy->getElementType(0), DestTy, true); - bool AllSame = true; - for (unsigned i = 1; i != NumElems; ++i) - if (MemberAlign != getFoldedAlignOf(UTy->getElementType(i), DestTy, true)) { - AllSame = false; - break; - } - if (AllSame) - return MemberAlign; - } - // Pointer alignment doesn't depend on the pointee type, so canonicalize them // to an arbitrary pointee. if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) @@ -909,8 +875,6 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, unsigned numOps; if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy)) numOps = AR->getNumElements(); - else if (AggTy->isUnionTy()) - numOps = 1; else numOps = cast<StructType>(AggTy)->getNumElements(); @@ -927,10 +891,6 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, if (const StructType* ST = dyn_cast<StructType>(AggTy)) return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked()); - if (const UnionType* UT = dyn_cast<UnionType>(AggTy)) { - assert(Ops.size() == 1 && "Union can only contain a single value!"); - return ConstantUnion::get(UT, Ops[0]); - } return ConstantArray::get(cast<ArrayType>(AggTy), Ops); } diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 00b009401dccd..16eaca81048bb 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -59,7 +59,6 @@ Constant *Constant::getNullValue(const Type *Ty) { case Type::PointerTyID: return ConstantPointerNull::get(cast<PointerType>(Ty)); case Type::StructTyID: - case Type::UnionTyID: case Type::ArrayTyID: case Type::VectorTyID: return ConstantAggregateZero::get(Ty); @@ -526,6 +525,7 @@ Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals, Constant* ConstantArray::get(LLVMContext &Context, StringRef Str, bool AddNull) { std::vector<Constant*> ElementVals; + ElementVals.reserve(Str.size() + size_t(AddNull)); for (unsigned i = 0; i < Str.size(); ++i) ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), Str[i])); @@ -586,27 +586,6 @@ Constant* ConstantStruct::get(LLVMContext &Context, return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed); } -ConstantUnion::ConstantUnion(const UnionType *T, Constant* V) - : Constant(T, ConstantUnionVal, - OperandTraits<ConstantUnion>::op_end(this) - 1, 1) { - Use *OL = OperandList; - assert(T->getElementTypeIndex(V->getType()) >= 0 && - "Initializer for union element isn't a member of union type!"); - *OL = V; -} - -// ConstantUnion accessors. -Constant* ConstantUnion::get(const UnionType* T, Constant* V) { - LLVMContextImpl* pImpl = T->getContext().pImpl; - - // Create a ConstantAggregateZero value if all elements are zeros... - if (!V->isNullValue()) - return pImpl->UnionConstants.getOrCreate(T, V); - - return ConstantAggregateZero::get(T); -} - - ConstantVector::ConstantVector(const VectorType *T, const std::vector<Constant*> &V) : Constant(T, ConstantVectorVal, @@ -723,7 +702,7 @@ bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const { if (getOpcode() != Instruction::GetElementPtr) return false; gep_type_iterator GEPI = gep_type_begin(this), E = gep_type_end(this); - User::const_op_iterator OI = next(this->op_begin()); + User::const_op_iterator OI = llvm::next(this->op_begin()); // Skip the first index, as it has no static limit. ++GEPI; @@ -945,8 +924,7 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) { // Factory Function Implementation ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) { - assert((Ty->isStructTy() || Ty->isUnionTy() - || Ty->isArrayTy() || Ty->isVectorTy()) && + assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) && "Cannot create an aggregate zero of non-aggregate type!"); LLVMContextImpl *pImpl = Ty->getContext().pImpl; @@ -956,14 +934,14 @@ ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) { /// destroyConstant - Remove the constant from the constant table... /// void ConstantAggregateZero::destroyConstant() { - getType()->getContext().pImpl->AggZeroConstants.remove(this); + getRawType()->getContext().pImpl->AggZeroConstants.remove(this); destroyConstantImpl(); } /// destroyConstant - Remove the constant from the constant table... /// void ConstantArray::destroyConstant() { - getType()->getContext().pImpl->ArrayConstants.remove(this); + getRawType()->getContext().pImpl->ArrayConstants.remove(this); destroyConstantImpl(); } @@ -1027,21 +1005,14 @@ namespace llvm { // destroyConstant - Remove the constant from the constant table... // void ConstantStruct::destroyConstant() { - getType()->getContext().pImpl->StructConstants.remove(this); - destroyConstantImpl(); -} - -// destroyConstant - Remove the constant from the constant table... -// -void ConstantUnion::destroyConstant() { - getType()->getContext().pImpl->UnionConstants.remove(this); + getRawType()->getContext().pImpl->StructConstants.remove(this); destroyConstantImpl(); } // destroyConstant - Remove the constant from the constant table... // void ConstantVector::destroyConstant() { - getType()->getContext().pImpl->VectorConstants.remove(this); + getRawType()->getContext().pImpl->VectorConstants.remove(this); destroyConstantImpl(); } @@ -1082,7 +1053,7 @@ ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) { // destroyConstant - Remove the constant from the constant table... // void ConstantPointerNull::destroyConstant() { - getType()->getContext().pImpl->NullPtrConstants.remove(this); + getRawType()->getContext().pImpl->NullPtrConstants.remove(this); destroyConstantImpl(); } @@ -1097,7 +1068,7 @@ UndefValue *UndefValue::get(const Type *Ty) { // destroyConstant - Remove the constant from the constant table. // void UndefValue::destroyConstant() { - getType()->getContext().pImpl->UndefValueConstants.remove(this); + getRawType()->getContext().pImpl->UndefValueConstants.remove(this); destroyConstantImpl(); } @@ -1131,7 +1102,7 @@ BlockAddress::BlockAddress(Function *F, BasicBlock *BB) // destroyConstant - Remove the constant from the constant table. // void BlockAddress::destroyConstant() { - getFunction()->getType()->getContext().pImpl + getFunction()->getRawType()->getContext().pImpl ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock())); getBasicBlock()->AdjustBlockAddressRefCount(-1); destroyConstantImpl(); @@ -1930,7 +1901,7 @@ Constant* ConstantExpr::getAShr(Constant* C1, Constant* C2) { // destroyConstant - Remove the constant from the constant table... // void ConstantExpr::destroyConstant() { - getType()->getContext().pImpl->ExprConstants.remove(this); + getRawType()->getContext().pImpl->ExprConstants.remove(this); destroyConstantImpl(); } @@ -1971,11 +1942,10 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!"); Constant *ToC = cast<Constant>(To); - LLVMContext &Context = getType()->getContext(); - LLVMContextImpl *pImpl = Context.pImpl; + LLVMContextImpl *pImpl = getRawType()->getContext().pImpl; std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup; - Lookup.first.first = getType(); + Lookup.first.first = cast<ArrayType>(getRawType()); Lookup.second = this; std::vector<Constant*> &Values = Lookup.first.second; @@ -2009,7 +1979,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, Constant *Replacement = 0; if (isAllZeros) { - Replacement = ConstantAggregateZero::get(getType()); + Replacement = ConstantAggregateZero::get(getRawType()); } else { // Check to see if we have this array type already. bool Exists; @@ -2060,7 +2030,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!"); std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup; - Lookup.first.first = getType(); + Lookup.first.first = cast<StructType>(getRawType()); Lookup.second = this; std::vector<Constant*> &Values = Lookup.first.second; Values.reserve(getNumOperands()); // Build replacement struct. @@ -2082,14 +2052,13 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, } Values[OperandToUpdate] = ToC; - LLVMContext &Context = getType()->getContext(); - LLVMContextImpl *pImpl = Context.pImpl; + LLVMContextImpl *pImpl = getRawType()->getContext().pImpl; Constant *Replacement = 0; if (isAllZeros) { - Replacement = ConstantAggregateZero::get(getType()); + Replacement = ConstantAggregateZero::get(getRawType()); } else { - // Check to see if we have this array type already. + // Check to see if we have this struct type already. bool Exists; LLVMContextImpl::StructConstantsTy::MapTy::iterator I = pImpl->StructConstants.InsertOrGetItem(Lookup, Exists); @@ -2118,56 +2087,6 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, destroyConstant(); } -void ConstantUnion::replaceUsesOfWithOnConstant(Value *From, Value *To, - Use *U) { - assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!"); - Constant *ToC = cast<Constant>(To); - - assert(U == OperandList && "Union constants can only have one use!"); - assert(getNumOperands() == 1 && "Union constants can only have one use!"); - assert(getOperand(0) == From && "ReplaceAllUsesWith broken!"); - - std::pair<LLVMContextImpl::UnionConstantsTy::MapKey, ConstantUnion*> Lookup; - Lookup.first.first = getType(); - Lookup.second = this; - Lookup.first.second = ToC; - - LLVMContext &Context = getType()->getContext(); - LLVMContextImpl *pImpl = Context.pImpl; - - Constant *Replacement = 0; - if (ToC->isNullValue()) { - Replacement = ConstantAggregateZero::get(getType()); - } else { - // Check to see if we have this union type already. - bool Exists; - LLVMContextImpl::UnionConstantsTy::MapTy::iterator I = - pImpl->UnionConstants.InsertOrGetItem(Lookup, Exists); - - if (Exists) { - Replacement = I->second; - } else { - // Okay, the new shape doesn't exist in the system yet. Instead of - // creating a new constant union, inserting it, replaceallusesof'ing the - // old with the new, then deleting the old... just update the current one - // in place! - pImpl->UnionConstants.MoveConstantToNewSlot(this, I); - - // Update to the new value. - setOperand(0, ToC); - return; - } - } - - assert(Replacement != this && "I didn't contain From!"); - - // Everyone using this now uses the replacement. - uncheckedReplaceAllUsesWith(Replacement); - - // Delete the old constant! - destroyConstant(); -} - void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!"); @@ -2180,7 +2099,7 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To, Values.push_back(Val); } - Constant *Replacement = get(getType(), Values); + Constant *Replacement = get(cast<VectorType>(getRawType()), Values); assert(Replacement != this && "I didn't contain From!"); // Everyone using this now uses the replacement. @@ -2227,7 +2146,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, &Indices[0], Indices.size()); } else if (isCast()) { assert(getOperand(0) == From && "Cast only has one use!"); - Replacement = ConstantExpr::getCast(getOpcode(), To, getType()); + Replacement = ConstantExpr::getCast(getOpcode(), To, getRawType()); } else if (getOpcode() == Instruction::Select) { Constant *C1 = getOperand(0); Constant *C2 = getOperand(1); diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index 2f2fac53f062e..1c04c3e1987e2 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -511,14 +511,6 @@ struct ConstantKeyData<ConstantStruct> { } }; -template<> -struct ConstantKeyData<ConstantUnion> { - typedef Constant* ValType; - static ValType getValType(ConstantUnion *CU) { - return cast<Constant>(CU->getOperand(0)); - } -}; - // ConstantPointerNull does not take extra "value" argument... template<class ValType> struct ConstantCreator<ConstantPointerNull, PointerType, ValType> { @@ -757,9 +749,13 @@ public: // If this constant is the representative element for its abstract type, // update the AbstractTypeMap so that the representative element is I. - if (C->getType()->isAbstract()) { + // + // This must use getRawType() because if the type is under refinement, we + // will get the refineAbstractType callback below, and we don't want to + // kick union find in on the constant. + if (C->getRawType()->isAbstract()) { typename AbstractTypeMapTy::iterator ATI = - AbstractTypeMap.find(C->getType()); + AbstractTypeMap.find(cast<DerivedType>(C->getRawType())); assert(ATI != AbstractTypeMap.end() && "Abstract type not in AbstractTypeMap?"); if (ATI->second == OldI) diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index ca1a399fe8aae..5aad19dd2a4ad 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -22,6 +22,7 @@ #include "llvm/TypeSymbolTable.h" #include "llvm/InlineAsm.h" #include "llvm/IntrinsicInst.h" +#include "llvm/PassManager.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -155,8 +156,6 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) { return LLVMFunctionTypeKind; case Type::StructTyID: return LLVMStructTypeKind; - case Type::UnionTyID: - return LLVMUnionTypeKind; case Type::ArrayTyID: return LLVMArrayTypeKind; case Type::PointerTyID: @@ -315,34 +314,6 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) { return unwrap<StructType>(StructTy)->isPacked(); } -/*--.. Operations on union types ..........................................--*/ - -LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, - unsigned ElementCount) { - SmallVector<const Type*, 8> Tys; - for (LLVMTypeRef *I = ElementTypes, - *E = ElementTypes + ElementCount; I != E; ++I) - Tys.push_back(unwrap(*I)); - - return wrap(UnionType::get(&Tys[0], Tys.size())); -} - -LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes, unsigned ElementCount) { - return LLVMUnionTypeInContext(LLVMGetGlobalContext(), ElementTypes, - ElementCount); -} - -unsigned LLVMCountUnionElementTypes(LLVMTypeRef UnionTy) { - return unwrap<UnionType>(UnionTy)->getNumElements(); -} - -void LLVMGetUnionElementTypes(LLVMTypeRef UnionTy, LLVMTypeRef *Dest) { - UnionType *Ty = unwrap<UnionType>(UnionTy); - for (FunctionType::param_iterator I = Ty->element_begin(), - E = Ty->element_end(); I != E; ++I) - *Dest++ = wrap(*I); -} - /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/ LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) { @@ -488,6 +459,14 @@ LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) { return wrap(unwrap<User>(Val)->getOperand(Index)); } +void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) { + unwrap<User>(Val)->setOperand(Index, unwrap(Op)); +} + +int LLVMGetNumOperands(LLVMValueRef Val) { + return unwrap<User>(Val)->getNumOperands(); +} + /*--.. Operations on constants of any type .................................--*/ LLVMValueRef LLVMConstNull(LLVMTypeRef Ty) { @@ -619,10 +598,6 @@ LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) { return wrap(ConstantVector::get( unwrap<Constant>(ScalarConstantVals, Size), Size)); } -LLVMValueRef LLVMConstUnion(LLVMTypeRef Ty, LLVMValueRef Val) { - return wrap(ConstantUnion::get(unwrap<UnionType>(Ty), unwrap<Constant>(Val))); -} - /*--.. Constant expressions ................................................--*/ LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) { @@ -1060,6 +1035,8 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) { return LLVMLinkerPrivateLinkage; case GlobalValue::LinkerPrivateWeakLinkage: return LLVMLinkerPrivateWeakLinkage; + case GlobalValue::LinkerPrivateWeakDefAutoLinkage: + return LLVMLinkerPrivateWeakDefAutoLinkage; case GlobalValue::DLLImportLinkage: return LLVMDLLImportLinkage; case GlobalValue::DLLExportLinkage: @@ -1113,6 +1090,9 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) { case LLVMLinkerPrivateWeakLinkage: GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage); break; + case LLVMLinkerPrivateWeakDefAutoLinkage: + GV->setLinkage(GlobalValue::LinkerPrivateWeakDefAutoLinkage); + break; case LLVMDLLImportLinkage: GV->setLinkage(GlobalValue::DLLImportLinkage); break; @@ -1515,6 +1495,14 @@ void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) { unwrap(BBRef)->eraseFromParent(); } +void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) { + unwrap(BB)->moveBefore(unwrap(MovePos)); +} + +void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) { + unwrap(BB)->moveAfter(unwrap(MovePos)); +} + /*--.. Operations on instructions ..........................................--*/ LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst) { @@ -2223,3 +2211,39 @@ LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) { delete unwrap(MemBuf); } + + +/*===-- Pass Manager ------------------------------------------------------===*/ + +LLVMPassManagerRef LLVMCreatePassManager() { + return wrap(new PassManager()); +} + +LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) { + return wrap(new FunctionPassManager(unwrap(M))); +} + +LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) { + return LLVMCreateFunctionPassManagerForModule( + reinterpret_cast<LLVMModuleRef>(P)); +} + +LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) { + return unwrap<PassManager>(PM)->run(*unwrap(M)); +} + +LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) { + return unwrap<FunctionPassManager>(FPM)->doInitialization(); +} + +LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) { + return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F)); +} + +LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) { + return unwrap<FunctionPassManager>(FPM)->doFinalization(); +} + +void LLVMDisposePassManager(LLVMPassManagerRef PM) { + delete unwrap(PM); +} diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp index 10a866fab6226..f3dad824461dd 100644 --- a/lib/VMCore/Dominators.cpp +++ b/lib/VMCore/Dominators.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallPtrSet.h" @@ -51,8 +52,8 @@ TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>); TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>); char DominatorTree::ID = 0; -static RegisterPass<DominatorTree> -E("domtree", "Dominator Tree Construction", true, true); +INITIALIZE_PASS(DominatorTree, "domtree", + "Dominator Tree Construction", true, true); bool DominatorTree::runOnFunction(Function &F) { DT->recalculate(F); @@ -105,8 +106,8 @@ bool DominatorTree::dominates(const Instruction *A, const Instruction *B) const{ //===----------------------------------------------------------------------===// char DominanceFrontier::ID = 0; -static RegisterPass<DominanceFrontier> -G("domfrontier", "Dominance Frontier Construction", true, true); +INITIALIZE_PASS(DominanceFrontier, "domfrontier", + "Dominance Frontier Construction", true, true); void DominanceFrontier::verifyAnalysis() const { if (!VerifyDomInfo) return; @@ -122,36 +123,23 @@ void DominanceFrontier::verifyAnalysis() const { // NewBB is split and now it has one successor. Update dominance frontier to // reflect this change. void DominanceFrontier::splitBlock(BasicBlock *NewBB) { - assert(NewBB->getTerminator()->getNumSuccessors() == 1 - && "NewBB should have a single successor!"); + assert(NewBB->getTerminator()->getNumSuccessors() == 1 && + "NewBB should have a single successor!"); BasicBlock *NewBBSucc = NewBB->getTerminator()->getSuccessor(0); - SmallVector<BasicBlock*, 8> PredBlocks; - for (pred_iterator PI = pred_begin(NewBB), PE = pred_end(NewBB); - PI != PE; ++PI) - PredBlocks.push_back(*PI); - - if (PredBlocks.empty()) - // If NewBB does not have any predecessors then it is a entry block. - // In this case, NewBB and its successor NewBBSucc dominates all - // other blocks. - return; - // NewBBSucc inherits original NewBB frontier. DominanceFrontier::iterator NewBBI = find(NewBB); - if (NewBBI != end()) { - DominanceFrontier::DomSetType NewBBSet = NewBBI->second; - DominanceFrontier::DomSetType NewBBSuccSet; - NewBBSuccSet.insert(NewBBSet.begin(), NewBBSet.end()); - addBasicBlock(NewBBSucc, NewBBSuccSet); - } + if (NewBBI != end()) + addBasicBlock(NewBBSucc, NewBBI->second); // If NewBB dominates NewBBSucc, then DF(NewBB) is now going to be the - // DF(PredBlocks[0]) without the stuff that the new block does not dominate + // DF(NewBBSucc) without the stuff that the new block does not dominate // a predecessor of. DominatorTree &DT = getAnalysis<DominatorTree>(); - if (DT.dominates(NewBB, NewBBSucc)) { - DominanceFrontier::iterator DFI = find(PredBlocks[0]); + DomTreeNode *NewBBNode = DT.getNode(NewBB); + DomTreeNode *NewBBSuccNode = DT.getNode(NewBBSucc); + if (DT.dominates(NewBBNode, NewBBSuccNode)) { + DominanceFrontier::iterator DFI = find(NewBBSucc); if (DFI != end()) { DominanceFrontier::DomSetType Set = DFI->second; // Filter out stuff in Set that we do not dominate a predecessor of. @@ -160,8 +148,10 @@ void DominanceFrontier::splitBlock(BasicBlock *NewBB) { bool DominatesPred = false; for (pred_iterator PI = pred_begin(*SetI), E = pred_end(*SetI); PI != E; ++PI) - if (DT.dominates(NewBB, *PI)) + if (DT.dominates(NewBBNode, DT.getNode(*PI))) { DominatesPred = true; + break; + } if (!DominatesPred) Set.erase(SetI++); else @@ -186,50 +176,71 @@ void DominanceFrontier::splitBlock(BasicBlock *NewBB) { NewDFSet.insert(NewBBSucc); addBasicBlock(NewBB, NewDFSet); } - - // Now we must loop over all of the dominance frontiers in the function, - // replacing occurrences of NewBBSucc with NewBB in some cases. All - // blocks that dominate a block in PredBlocks and contained NewBBSucc in - // their dominance frontier must be updated to contain NewBB instead. - // - for (Function::iterator FI = NewBB->getParent()->begin(), - FE = NewBB->getParent()->end(); FI != FE; ++FI) { - DominanceFrontier::iterator DFI = find(FI); - if (DFI == end()) continue; // unreachable block. - - // Only consider nodes that have NewBBSucc in their dominator frontier. - if (!DFI->second.count(NewBBSucc)) continue; - - // Verify whether this block dominates a block in predblocks. If not, do - // not update it. - bool BlockDominatesAny = false; - for (SmallVectorImpl<BasicBlock*>::const_iterator BI = PredBlocks.begin(), - BE = PredBlocks.end(); BI != BE; ++BI) { - if (DT.dominates(FI, *BI)) { - BlockDominatesAny = true; + + // Now update dominance frontiers which either used to contain NewBBSucc + // or which now need to include NewBB. + + // Collect the set of blocks which dominate a predecessor of NewBB or + // NewSuccBB and which don't dominate both. This is an initial + // approximation of the blocks whose dominance frontiers will need updates. + SmallVector<DomTreeNode *, 16> AllPredDoms; + + // Compute the block which dominates both NewBBSucc and NewBB. This is + // the immediate dominator of NewBBSucc unless NewBB dominates NewBBSucc. + // The code below which climbs dominator trees will stop at this point, + // because from this point up, dominance frontiers are unaffected. + DomTreeNode *DominatesBoth = 0; + if (NewBBSuccNode) { + DominatesBoth = NewBBSuccNode->getIDom(); + if (DominatesBoth == NewBBNode) + DominatesBoth = NewBBNode->getIDom(); + } + + // Collect the set of all blocks which dominate a predecessor of NewBB. + SmallPtrSet<DomTreeNode *, 8> NewBBPredDoms; + for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) + for (DomTreeNode *DTN = DT.getNode(*PI); DTN; DTN = DTN->getIDom()) { + if (DTN == DominatesBoth) break; - } + if (!NewBBPredDoms.insert(DTN)) + break; + AllPredDoms.push_back(DTN); } - // If NewBBSucc should not stay in our dominator frontier, remove it. - // We remove it unless there is a predecessor of NewBBSucc that we - // dominate, but we don't strictly dominate NewBBSucc. - bool ShouldRemove = true; - if ((BasicBlock*)FI == NewBBSucc || !DT.dominates(FI, NewBBSucc)) { - // Okay, we know that PredDom does not strictly dominate NewBBSucc. - // Check to see if it dominates any predecessors of NewBBSucc. - for (pred_iterator PI = pred_begin(NewBBSucc), - E = pred_end(NewBBSucc); PI != E; ++PI) - if (DT.dominates(FI, *PI)) { - ShouldRemove = false; - break; - } + // Collect the set of all blocks which dominate a predecessor of NewSuccBB. + SmallPtrSet<DomTreeNode *, 8> NewBBSuccPredDoms; + for (pred_iterator PI = pred_begin(NewBBSucc), + E = pred_end(NewBBSucc); PI != E; ++PI) + for (DomTreeNode *DTN = DT.getNode(*PI); DTN; DTN = DTN->getIDom()) { + if (DTN == DominatesBoth) + break; + if (!NewBBSuccPredDoms.insert(DTN)) + break; + if (!NewBBPredDoms.count(DTN)) + AllPredDoms.push_back(DTN); } - - if (ShouldRemove) - removeFromFrontier(DFI, NewBBSucc); - if (BlockDominatesAny && (&*FI == NewBB || !DT.dominates(FI, NewBB))) + + // Visit all relevant dominance frontiers and make any needed updates. + for (SmallVectorImpl<DomTreeNode *>::const_iterator I = AllPredDoms.begin(), + E = AllPredDoms.end(); I != E; ++I) { + DomTreeNode *DTN = *I; + iterator DFI = find((*I)->getBlock()); + + // Only consider nodes that have NewBBSucc in their dominator frontier. + if (DFI == end() || !DFI->second.count(NewBBSucc)) continue; + + // If the block dominates a predecessor of NewBB but does not properly + // dominate NewBB itself, add NewBB to its dominance frontier. + if (NewBBPredDoms.count(DTN) && + !DT.properlyDominates(DTN, NewBBNode)) addToFrontier(DFI, NewBB); + + // If the block does not dominate a predecessor of NewBBSucc or + // properly dominates NewBBSucc itself, remove NewBBSucc from its + // dominance frontier. + if (!NewBBSuccPredDoms.count(DTN) || + DT.properlyDominates(DTN, NewBBSuccNode)) + removeFromFrontier(DFI, NewBBSucc); } } @@ -343,3 +354,7 @@ void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const { } } +void DominanceFrontierBase::dump() const { + print(dbgs()); +} + diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp index b758eb8702aea..96716eeb349b9 100644 --- a/lib/VMCore/Globals.cpp +++ b/lib/VMCore/Globals.cpp @@ -102,7 +102,14 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) { setVisibility(Src->getVisibility()); } - +void GlobalValue::setAlignment(unsigned Align) { + assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); + assert(Align <= MaximumAlignment && + "Alignment is greater than MaximumAlignment!"); + Alignment = Log2_32(Align) + 1; + assert(getAlignment() == Align && "Alignment representation error!"); +} + //===----------------------------------------------------------------------===// // GlobalVariable Implementation //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp index 0d2eca9c3dea6..69f713b2c42c2 100644 --- a/lib/VMCore/InlineAsm.cpp +++ b/lib/VMCore/InlineAsm.cpp @@ -164,7 +164,7 @@ InlineAsm::ParseConstraints(StringRef Constraints) { StringRef::iterator ConstraintEnd = std::find(I, E, ','); if (ConstraintEnd == I || // Empty constraint like ",," - Info.Parse(std::string(I, ConstraintEnd), Result)) { + Info.Parse(StringRef(I, ConstraintEnd-I), Result)) { Result.clear(); // Erroneous constraint? break; } diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index 9792adaaa122d..05bed4c64316f 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -49,8 +49,8 @@ Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps, // Out of line virtual method, so the vtable, etc has a home. Instruction::~Instruction() { assert(Parent == 0 && "Instruction still linked in the program!"); - if (hasMetadata()) - removeAllMetadata(); + if (hasMetadataHashEntry()) + clearMetadataHashEntries(); } diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index c13696f229022..401802ed13d5e 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -33,10 +33,8 @@ using namespace llvm; User::op_iterator CallSite::getCallee() const { Instruction *II(getInstruction()); return isCall() - ? (CallInst::ArgOffset - ? cast</*FIXME: CallInst*/User>(II)->op_begin() - : cast</*FIXME: CallInst*/User>(II)->op_end() - 1) - : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Function + ? cast<CallInst>(II)->op_end() - 1 // Skip Callee + : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Callee } //===----------------------------------------------------------------------===// @@ -233,7 +231,7 @@ CallInst::~CallInst() { void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) { assert(NumOperands == NumParams+1 && "NumOperands not set up?"); - Op<ArgOffset -1>() = Func; + Op<-1>() = Func; const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); @@ -246,15 +244,15 @@ void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) { assert((i >= FTy->getNumParams() || FTy->getParamType(i) == Params[i]->getType()) && "Calling a function with a bad signature!"); - OperandList[i + ArgOffset] = Params[i]; + OperandList[i] = Params[i]; } } void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) { assert(NumOperands == 3 && "NumOperands not set up?"); - Op<ArgOffset -1>() = Func; - Op<ArgOffset + 0>() = Actual1; - Op<ArgOffset + 1>() = Actual2; + Op<-1>() = Func; + Op<0>() = Actual1; + Op<1>() = Actual2; const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); @@ -273,8 +271,8 @@ void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) { void CallInst::init(Value *Func, Value *Actual) { assert(NumOperands == 2 && "NumOperands not set up?"); - Op<ArgOffset -1>() = Func; - Op<ArgOffset + 0>() = Actual; + Op<-1>() = Func; + Op<0>() = Actual; const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); @@ -290,7 +288,7 @@ void CallInst::init(Value *Func, Value *Actual) { void CallInst::init(Value *Func) { assert(NumOperands == 1 && "NumOperands not set up?"); - Op<ArgOffset -1>() = Func; + Op<-1>() = Func; const FunctionType *FTy = cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType()); @@ -893,6 +891,8 @@ AllocaInst::~AllocaInst() { void AllocaInst::setAlignment(unsigned Align) { assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); + assert(Align <= MaximumAlignment && + "Alignment is greater than MaximumAlignment!"); setInstructionSubclassData(Log2_32(Align) + 1); assert(getAlignment() == Align && "Alignment representation error!"); } @@ -1028,8 +1028,11 @@ LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile, void LoadInst::setAlignment(unsigned Align) { assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); + assert(Align <= MaximumAlignment && + "Alignment is greater than MaximumAlignment!"); setInstructionSubclassData((getSubclassDataFromInstruction() & 1) | ((Log2_32(Align)+1)<<1)); + assert(getAlignment() == Align && "Alignment representation error!"); } //===----------------------------------------------------------------------===// @@ -1124,8 +1127,11 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, void StoreInst::setAlignment(unsigned Align) { assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); + assert(Align <= MaximumAlignment && + "Alignment is greater than MaximumAlignment!"); setInstructionSubclassData((getSubclassDataFromInstruction() & 1) | ((Log2_32(Align)+1) << 1)); + assert(getAlignment() == Align && "Alignment representation error!"); } //===----------------------------------------------------------------------===// @@ -1424,9 +1430,24 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, return false; const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType()); - if (!isa<Constant>(Mask) || MaskTy == 0 || - !MaskTy->getElementType()->isIntegerTy(32)) + if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32)) + return false; + + // Check to see if Mask is valid. + if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) { + const VectorType *VTy = cast<VectorType>(V1->getType()); + for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) { + if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) { + if (CI->uge(VTy->getNumElements()*2)) + return false; + } else if (!isa<UndefValue>(MV->getOperand(i))) { + return false; + } + } + } + else if (!isa<UndefValue>(Mask) && !isa<ConstantAggregateZero>(Mask)) return false; + return true; } diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp index 4d61363b9394d..563c651315a33 100644 --- a/lib/VMCore/LLVMContext.cpp +++ b/lib/VMCore/LLVMContext.cpp @@ -110,21 +110,18 @@ static bool isValidName(StringRef MDName) { /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. unsigned LLVMContext::getMDKindID(StringRef Name) const { assert(isValidName(Name) && "Invalid MDNode name"); - - unsigned &Entry = pImpl->CustomMDKindNames[Name]; - + // If this is new, assign it its ID. - if (Entry == 0) Entry = pImpl->CustomMDKindNames.size(); - return Entry; + return + pImpl->CustomMDKindNames.GetOrCreateValue( + Name, pImpl->CustomMDKindNames.size()).second; } /// getHandlerNames - Populate client supplied smallvector using custome /// metadata name and ID. void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const { - Names.resize(pImpl->CustomMDKindNames.size()+1); - Names[0] = ""; + Names.resize(pImpl->CustomMDKindNames.size()); for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(), E = pImpl->CustomMDKindNames.end(); I != E; ++I) - // MD Handlers are numbered from 1. Names[I->second] = I->first(); } diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp index 9e41a08156084..93a075f0fccbb 100644 --- a/lib/VMCore/LLVMContextImpl.cpp +++ b/lib/VMCore/LLVMContextImpl.cpp @@ -57,14 +57,11 @@ LLVMContextImpl::~LLVMContextImpl() { DropReferences()); std::for_each(StructConstants.map_begin(), StructConstants.map_end(), DropReferences()); - std::for_each(UnionConstants.map_begin(), UnionConstants.map_end(), - DropReferences()); std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(), DropReferences()); ExprConstants.freeConstants(); ArrayConstants.freeConstants(); StructConstants.freeConstants(); - UnionConstants.freeConstants(); VectorConstants.freeConstants(); AggZeroConstants.freeConstants(); NullPtrConstants.freeConstants(); diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index 4876f5d5075a8..51b2992898c02 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -144,10 +144,6 @@ public: ConstantStruct, true /*largekey*/> StructConstantsTy; StructConstantsTy StructConstants; - typedef ConstantUniqueMap<Constant*, UnionType, ConstantUnion> - UnionConstantsTy; - UnionConstantsTy UnionConstants; - typedef ConstantUniqueMap<std::vector<Constant*>, VectorType, ConstantVector> VectorConstantsTy; VectorConstantsTy VectorConstants; @@ -192,7 +188,6 @@ public: TypeMap<PointerValType, PointerType> PointerTypes; TypeMap<FunctionValType, FunctionType> FunctionTypes; TypeMap<StructValType, StructType> StructTypes; - TypeMap<UnionValType, UnionType> UnionTypes; TypeMap<IntegerValType, IntegerType> IntegerTypes; // Opaque types are not structurally uniqued, so don't use TypeMap. diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 3100d4ac7c9c3..da69c43ff7359 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/SmallString.h" #include "SymbolTableListTraitsImpl.h" +#include "llvm/Support/LeakDetector.h" #include "llvm/Support/ValueHandle.h" using namespace llvm; @@ -186,6 +187,21 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, unsigned NumVals, FunctionLocalness FL, bool Insert) { LLVMContextImpl *pImpl = Context.pImpl; + + // Add all the operand pointers. Note that we don't have to add the + // isFunctionLocal bit because that's implied by the operands. + // Note that if the operands are later nulled out, the node will be + // removed from the uniquing map. + FoldingSetNodeID ID; + for (unsigned i = 0; i != NumVals; ++i) + ID.AddPointer(Vals[i]); + + void *InsertPoint; + MDNode *N = NULL; + + if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint))) + return N; + bool isFunctionLocal = false; switch (FL) { case FL_Unknown: @@ -206,20 +222,6 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, break; } - FoldingSetNodeID ID; - for (unsigned i = 0; i != NumVals; ++i) - ID.AddPointer(Vals[i]); - ID.AddBoolean(isFunctionLocal); - - void *InsertPoint; - MDNode *N = NULL; - - if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint))) - return N; - - if (!Insert) - return NULL; - // Coallocate space for the node and Operands together, then placement new. void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand)); N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal); @@ -244,15 +246,42 @@ MDNode *MDNode::getIfExists(LLVMContext &Context, Value *const *Vals, return getMDNode(Context, Vals, NumVals, FL_Unknown, false); } +MDNode *MDNode::getTemporary(LLVMContext &Context, Value *const *Vals, + unsigned NumVals) { + MDNode *N = (MDNode *)malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand)); + N = new (N) MDNode(Context, Vals, NumVals, FL_No); + N->setValueSubclassData(N->getSubclassDataFromValue() | + NotUniquedBit); + LeakDetector::addGarbageObject(N); + return N; +} + +void MDNode::deleteTemporary(MDNode *N) { + assert(N->use_empty() && "Temporary MDNode has uses!"); + assert(!N->getContext().pImpl->MDNodeSet.RemoveNode(N) && + "Deleting a non-temporary uniqued node!"); + assert(!N->getContext().pImpl->NonUniquedMDNodes.erase(N) && + "Deleting a non-temporary non-uniqued node!"); + assert((N->getSubclassDataFromValue() & NotUniquedBit) && + "Temporary MDNode does not have NotUniquedBit set!"); + assert((N->getSubclassDataFromValue() & DestroyFlag) == 0 && + "Temporary MDNode has DestroyFlag set!"); + LeakDetector::removeGarbageObject(N); + N->destroy(); +} + /// getOperand - Return specified operand. Value *MDNode::getOperand(unsigned i) const { return *getOperandPtr(const_cast<MDNode*>(this), i); } void MDNode::Profile(FoldingSetNodeID &ID) const { + // Add all the operand pointers. Note that we don't have to add the + // isFunctionLocal bit because that's implied by the operands. + // Note that if the operands are later nulled out, the node will be + // removed from the uniquing map. for (unsigned i = 0, e = getNumOperands(); i != e; ++i) ID.AddPointer(getOperand(i)); - ID.AddBoolean(isFunctionLocal()); } void MDNode::setIsNotUniqued() { @@ -301,7 +330,8 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { // If we are dropping an argument to null, we choose to not unique the MDNode // anymore. This commonly occurs during destruction, and uniquing these - // brings little reuse. + // brings little reuse. Also, this means we don't need to include + // isFunctionLocal bits in FoldingSetNodeIDs for MDNodes. if (To == 0) { setIsNotUniqued(); return; @@ -324,59 +354,35 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { // InsertPoint will have been set by the FindNodeOrInsertPos call. pImpl->MDNodeSet.InsertNode(this, InsertPoint); + + // If this MDValue was previously function-local but no longer is, clear + // its function-local flag. + if (isFunctionLocal() && !isFunctionLocalValue(To)) { + bool isStillFunctionLocal = false; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + Value *V = getOperand(i); + if (!V) continue; + if (isFunctionLocalValue(V)) { + isStillFunctionLocal = true; + break; + } + } + if (!isStillFunctionLocal) + setValueSubclassData(getSubclassDataFromValue() & ~FunctionLocalBit); + } } //===----------------------------------------------------------------------===// // NamedMDNode implementation. // -namespace llvm { -// SymbolTableListTraits specialization for MDSymbolTable. -void ilist_traits<NamedMDNode> -::addNodeToList(NamedMDNode *N) { - assert(N->getParent() == 0 && "Value already in a container!!"); - Module *Owner = getListOwner(); - N->setParent(Owner); - MDSymbolTable &ST = Owner->getMDSymbolTable(); - ST.insert(N->getName(), N); -} - -void ilist_traits<NamedMDNode>::removeNodeFromList(NamedMDNode *N) { - N->setParent(0); - Module *Owner = getListOwner(); - MDSymbolTable &ST = Owner->getMDSymbolTable(); - ST.remove(N->getName()); -} -} - -static SmallVector<WeakVH, 4> &getNMDOps(void *Operands) { - return *(SmallVector<WeakVH, 4>*)Operands; -} - -NamedMDNode::NamedMDNode(LLVMContext &C, const Twine &N, - MDNode *const *MDs, - unsigned NumMDs, Module *ParentModule) - : Value(Type::getMetadataTy(C), Value::NamedMDNodeVal), Parent(0) { - setName(N); - Operands = new SmallVector<WeakVH, 4>(); - - SmallVector<WeakVH, 4> &Node = getNMDOps(Operands); - for (unsigned i = 0; i != NumMDs; ++i) - Node.push_back(WeakVH(MDs[i])); - - if (ParentModule) - ParentModule->getNamedMDList().push_back(this); +static SmallVector<TrackingVH<MDNode>, 4> &getNMDOps(void *Operands) { + return *(SmallVector<TrackingVH<MDNode>, 4>*)Operands; } -NamedMDNode *NamedMDNode::Create(const NamedMDNode *NMD, Module *M) { - assert(NMD && "Invalid source NamedMDNode!"); - SmallVector<MDNode *, 4> Elems; - Elems.reserve(NMD->getNumOperands()); - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - Elems.push_back(NMD->getOperand(i)); - return new NamedMDNode(NMD->getContext(), NMD->getName().data(), - Elems.data(), Elems.size(), M); +NamedMDNode::NamedMDNode(const Twine &N) + : Name(N.str()), Parent(0), + Operands(new SmallVector<TrackingVH<MDNode>, 4>()) { } NamedMDNode::~NamedMDNode() { @@ -392,18 +398,20 @@ unsigned NamedMDNode::getNumOperands() const { /// getOperand - Return specified operand. MDNode *NamedMDNode::getOperand(unsigned i) const { assert(i < getNumOperands() && "Invalid Operand number!"); - return dyn_cast_or_null<MDNode>(getNMDOps(Operands)[i]); + return dyn_cast<MDNode>(&*getNMDOps(Operands)[i]); } /// addOperand - Add metadata Operand. void NamedMDNode::addOperand(MDNode *M) { - getNMDOps(Operands).push_back(WeakVH(M)); + assert(!M->isFunctionLocal() && + "NamedMDNode operands must not be function-local!"); + getNMDOps(Operands).push_back(TrackingVH<MDNode>(M)); } /// eraseFromParent - Drop all references and remove the node from parent /// module. void NamedMDNode::eraseFromParent() { - getParent()->getNamedMDList().erase(this); + getParent()->eraseNamedMetadata(this); } /// dropAllReferences - Remove all uses and clear node vector. @@ -411,22 +419,6 @@ void NamedMDNode::dropAllReferences() { getNMDOps(Operands).clear(); } -/// setName - Set the name of this named metadata. -void NamedMDNode::setName(const Twine &NewName) { - assert (!NewName.isTriviallyEmpty() && "Invalid named metadata name!"); - - SmallString<256> NameData; - StringRef NameRef = NewName.toStringRef(NameData); - - // Name isn't changing? - if (getName() == NameRef) - return; - - Name = NameRef.str(); - if (Parent) - Parent->getMDSymbolTable().insert(NameRef, this); -} - /// getName - Return a constant reference to this named metadata's name. StringRef NamedMDNode::getName() const { return StringRef(Name); @@ -445,10 +437,6 @@ MDNode *Instruction::getMetadataImpl(const char *Kind) const { return getMetadataImpl(getContext().getMDKindID(Kind)); } -void Instruction::setDbgMetadata(MDNode *Node) { - DbgLoc = DebugLoc::getFromDILocation(Node); -} - /// setMetadata - Set the metadata of of the specified kind to the specified /// node. This updates/replaces metadata if already present, or removes it if /// Node is null. @@ -567,13 +555,11 @@ getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned, } -/// removeAllMetadata - Remove all metadata from this instruction. -void Instruction::removeAllMetadata() { - assert(hasMetadata() && "Caller should check"); - DbgLoc = DebugLoc(); - if (hasMetadataHashEntry()) { - getContext().pImpl->MetadataStore.erase(this); - setHasMetadataHashEntry(false); - } +/// clearMetadataHashEntries - Clear all hashtable-based metadata from +/// this instruction. +void Instruction::clearMetadataHashEntries() { + assert(hasMetadataHashEntry() && "Caller should check"); + getContext().pImpl->MetadataStore.erase(this); + setHasMetadataHashEntry(false); } diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index 38a51dfd5d388..d7ddf96cb0700 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -58,10 +58,10 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>; // Module::Module(StringRef MID, LLVMContext& C) - : Context(C), Materializer(NULL), ModuleID(MID), DataLayout("") { + : Context(C), Materializer(NULL), ModuleID(MID) { ValSymTab = new ValueSymbolTable(); TypeSymTab = new TypeSymbolTable(); - NamedMDSymTab = new MDSymbolTable(); + NamedMDSymTab = new StringMap<NamedMDNode *>(); } Module::~Module() { @@ -73,7 +73,7 @@ Module::~Module() { NamedMDList.clear(); delete ValSymTab; delete TypeSymTab; - delete NamedMDSymTab; + delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab); } /// Target endian information... @@ -316,19 +316,28 @@ GlobalAlias *Module::getNamedAlias(StringRef Name) const { NamedMDNode *Module::getNamedMetadata(const Twine &Name) const { SmallString<256> NameData; StringRef NameRef = Name.toStringRef(NameData); - return NamedMDSymTab->lookup(NameRef); + return static_cast<StringMap<NamedMDNode*> *>(NamedMDSymTab)->lookup(NameRef); } /// getOrInsertNamedMetadata - Return the first named MDNode in the module /// with the specified name. This method returns a new NamedMDNode if a /// NamedMDNode with the specified name is not found. NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) { - NamedMDNode *NMD = NamedMDSymTab->lookup(Name); - if (!NMD) - NMD = NamedMDNode::Create(getContext(), Name, NULL, 0, this); + NamedMDNode *&NMD = + (*static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab))[Name]; + if (!NMD) { + NMD = new NamedMDNode(Name); + NMD->setParent(this); + NamedMDList.push_back(NMD); + } return NMD; } +void Module::eraseNamedMetadata(NamedMDNode *NMD) { + static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName()); + NamedMDList.erase(NMD); +} + //===----------------------------------------------------------------------===// // Methods for easy access to the types in the module. // diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index efd98af0f443c..a7d7f61dd7622 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -14,35 +14,18 @@ //===----------------------------------------------------------------------===// #include "llvm/Pass.h" -#include "llvm/PassManager.h" -#include "llvm/Module.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringMap.h" +#include "llvm/PassRegistry.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PassNameParser.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/System/Atomic.h" -#include "llvm/System/Mutex.h" -#include "llvm/System/Threading.h" -#include <algorithm> -#include <map> -#include <set> using namespace llvm; //===----------------------------------------------------------------------===// // Pass Implementation // -Pass::Pass(PassKind K, intptr_t pid) : Resolver(0), PassID(pid), Kind(K) { - assert(pid && "pid cannot be 0"); -} - -Pass::Pass(PassKind K, const void *pid) - : Resolver(0), PassID((intptr_t)pid), Kind(K) { - assert(pid && "pid cannot be 0"); -} +Pass::Pass(PassKind K, char &pid) : Resolver(0), PassID(&pid), Kind(K) { } // Force out-of-line virtual method. Pass::~Pass() { @@ -61,8 +44,8 @@ PassManagerType ModulePass::getPotentialPassManagerType() const { return PMT_ModulePassManager; } -bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const { - return Resolver->getAnalysisIfAvailable(AnalysisID, true) != 0; +bool Pass::mustPreserveAnalysisID(char &AID) const { + return Resolver->getAnalysisIfAvailable(&AID, true) != 0; } // dumpPassStructure - Implement the -debug-passes=Structure option @@ -75,7 +58,9 @@ void Pass::dumpPassStructure(unsigned Offset) { /// Registration templates, but can be overloaded directly. /// const char *Pass::getPassName() const { - if (const PassInfo *PI = getPassInfo()) + AnalysisID AID = getPassID(); + const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(AID); + if (PI) return PI->getPassName(); return "Unnamed pass: implement Pass::getPassName()"; } @@ -101,7 +86,7 @@ void Pass::verifyAnalysis() const { // By default, don't do anything. } -void *Pass::getAdjustedAnalysisPointer(const PassInfo *) { +void *Pass::getAdjustedAnalysisPointer(AnalysisID AID) { return this; } @@ -150,30 +135,6 @@ Pass *FunctionPass::createPrinterPass(raw_ostream &O, return createPrintFunctionPass(Banner, &O); } -// run - On a module, we run this pass by initializing, runOnFunction'ing once -// for every function in the module, then by finalizing. -// -bool FunctionPass::runOnModule(Module &M) { - bool Changed = doInitialization(M); - - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration()) // Passes are not run on external functions! - Changed |= runOnFunction(*I); - - return Changed | doFinalization(M); -} - -// run - On a function, we simply initialize, run the function, then finalize. -// -bool FunctionPass::run(Function &F) { - // Passes are not run on external functions! - if (F.isDeclaration()) return false; - - bool Changed = doInitialization(*F.getParent()); - Changed |= runOnFunction(F); - return Changed | doFinalization(*F.getParent()); -} - bool FunctionPass::doInitialization(Module &) { // By default, don't do anything. return false; @@ -199,16 +160,6 @@ Pass *BasicBlockPass::createPrinterPass(raw_ostream &O, return 0; } -// To run this pass on a function, we simply call runOnBasicBlock once for each -// function. -// -bool BasicBlockPass::runOnFunction(Function &F) { - bool Changed = doInitialization(F); - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - Changed |= runOnBasicBlock(*I); - return Changed | doFinalization(F); -} - bool BasicBlockPass::doInitialization(Module &) { // By default, don't do anything. return false; @@ -233,161 +184,12 @@ PassManagerType BasicBlockPass::getPotentialPassManagerType() const { return PMT_BasicBlockPassManager; } -//===----------------------------------------------------------------------===// -// Pass Registration mechanism -// -namespace { -class PassRegistrar { - /// Guards the contents of this class. - mutable sys::SmartMutex<true> Lock; - - /// PassInfoMap - Keep track of the passinfo object for each registered llvm - /// pass. - typedef std::map<intptr_t, const PassInfo*> MapType; - MapType PassInfoMap; - - typedef StringMap<const PassInfo*> StringMapType; - StringMapType PassInfoStringMap; - - /// AnalysisGroupInfo - Keep track of information for each analysis group. - struct AnalysisGroupInfo { - std::set<const PassInfo *> Implementations; - }; - - /// AnalysisGroupInfoMap - Information for each analysis group. - std::map<const PassInfo *, AnalysisGroupInfo> AnalysisGroupInfoMap; - -public: - - const PassInfo *GetPassInfo(intptr_t TI) const { - sys::SmartScopedLock<true> Guard(Lock); - MapType::const_iterator I = PassInfoMap.find(TI); - return I != PassInfoMap.end() ? I->second : 0; - } - - const PassInfo *GetPassInfo(StringRef Arg) const { - sys::SmartScopedLock<true> Guard(Lock); - StringMapType::const_iterator I = PassInfoStringMap.find(Arg); - return I != PassInfoStringMap.end() ? I->second : 0; - } - - void RegisterPass(const PassInfo &PI) { - sys::SmartScopedLock<true> Guard(Lock); - bool Inserted = - PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second; - assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted; - PassInfoStringMap[PI.getPassArgument()] = &PI; - } - - void UnregisterPass(const PassInfo &PI) { - sys::SmartScopedLock<true> Guard(Lock); - MapType::iterator I = PassInfoMap.find(PI.getTypeInfo()); - assert(I != PassInfoMap.end() && "Pass registered but not in map!"); - - // Remove pass from the map. - PassInfoMap.erase(I); - PassInfoStringMap.erase(PI.getPassArgument()); - } - - void EnumerateWith(PassRegistrationListener *L) { - sys::SmartScopedLock<true> Guard(Lock); - for (MapType::const_iterator I = PassInfoMap.begin(), - E = PassInfoMap.end(); I != E; ++I) - L->passEnumerate(I->second); - } - - - /// Analysis Group Mechanisms. - void RegisterAnalysisGroup(PassInfo *InterfaceInfo, - const PassInfo *ImplementationInfo, - bool isDefault) { - sys::SmartScopedLock<true> Guard(Lock); - AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo]; - assert(AGI.Implementations.count(ImplementationInfo) == 0 && - "Cannot add a pass to the same analysis group more than once!"); - AGI.Implementations.insert(ImplementationInfo); - if (isDefault) { - assert(InterfaceInfo->getNormalCtor() == 0 && - "Default implementation for analysis group already specified!"); - assert(ImplementationInfo->getNormalCtor() && - "Cannot specify pass as default if it does not have a default ctor"); - InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor()); - } - } -}; -} - -static std::vector<PassRegistrationListener*> *Listeners = 0; -static sys::SmartMutex<true> ListenersLock; - -static PassRegistrar *PassRegistrarObj = 0; -static PassRegistrar *getPassRegistrar() { - // Use double-checked locking to safely initialize the registrar when - // we're running in multithreaded mode. - PassRegistrar* tmp = PassRegistrarObj; - if (llvm_is_multithreaded()) { - sys::MemoryFence(); - if (!tmp) { - llvm_acquire_global_lock(); - tmp = PassRegistrarObj; - if (!tmp) { - tmp = new PassRegistrar(); - sys::MemoryFence(); - PassRegistrarObj = tmp; - } - llvm_release_global_lock(); - } - } else if (!tmp) { - PassRegistrarObj = new PassRegistrar(); - } - - return PassRegistrarObj; -} - -namespace { - -// FIXME: We use ManagedCleanup to erase the pass registrar on shutdown. -// Unfortunately, passes are registered with static ctors, and having -// llvm_shutdown clear this map prevents successful ressurection after -// llvm_shutdown is run. Ideally we should find a solution so that we don't -// leak the map, AND can still resurrect after shutdown. -void cleanupPassRegistrar(void*) { - if (PassRegistrarObj) { - delete PassRegistrarObj; - PassRegistrarObj = 0; - } -} -ManagedCleanup<&cleanupPassRegistrar> registrarCleanup ATTRIBUTE_USED; - -} - -// getPassInfo - Return the PassInfo data structure that corresponds to this -// pass... -const PassInfo *Pass::getPassInfo() const { - return lookupPassInfo(PassID); -} - -const PassInfo *Pass::lookupPassInfo(intptr_t TI) { - return getPassRegistrar()->GetPassInfo(TI); +const PassInfo *Pass::lookupPassInfo(const void *TI) { + return PassRegistry::getPassRegistry()->getPassInfo(TI); } const PassInfo *Pass::lookupPassInfo(StringRef Arg) { - return getPassRegistrar()->GetPassInfo(Arg); -} - -void PassInfo::registerPass() { - getPassRegistrar()->RegisterPass(*this); - - // Notify any listeners. - sys::SmartScopedLock<true> Lock(ListenersLock); - if (Listeners) - for (std::vector<PassRegistrationListener*>::iterator - I = Listeners->begin(), E = Listeners->end(); I != E; ++I) - (*I)->passRegistered(this); -} - -void PassInfo::unregisterPass() { - getPassRegistrar()->UnregisterPass(*this); + return PassRegistry::getPassRegistry()->getPassInfo(Arg); } Pass *PassInfo::createPass() const { @@ -404,32 +206,11 @@ Pass *PassInfo::createPass() const { // RegisterAGBase implementation // -RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID, - intptr_t PassID, bool isDefault) - : PassInfo(Name, InterfaceID) { - - PassInfo *InterfaceInfo = - const_cast<PassInfo*>(Pass::lookupPassInfo(InterfaceID)); - if (InterfaceInfo == 0) { - // First reference to Interface, register it now. - registerPass(); - InterfaceInfo = this; - } - assert(isAnalysisGroup() && - "Trying to join an analysis group that is a normal pass!"); - - if (PassID) { - const PassInfo *ImplementationInfo = Pass::lookupPassInfo(PassID); - assert(ImplementationInfo && - "Must register pass before adding to AnalysisGroup!"); - - // Make sure we keep track of the fact that the implementation implements - // the interface. - PassInfo *IIPI = const_cast<PassInfo*>(ImplementationInfo); - IIPI->addInterfaceImplemented(InterfaceInfo); - - getPassRegistrar()->RegisterAnalysisGroup(InterfaceInfo, IIPI, isDefault); - } +RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID, + const void *PassID, bool isDefault) + : PassInfo(Name, InterfaceID) { + PassRegistry::getPassRegistry()->registerAnalysisGroup(InterfaceID, PassID, + *this, isDefault); } @@ -440,31 +221,19 @@ RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID, // PassRegistrationListener ctor - Add the current object to the list of // PassRegistrationListeners... PassRegistrationListener::PassRegistrationListener() { - sys::SmartScopedLock<true> Lock(ListenersLock); - if (!Listeners) Listeners = new std::vector<PassRegistrationListener*>(); - Listeners->push_back(this); + PassRegistry::getPassRegistry()->addRegistrationListener(this); } // dtor - Remove object from list of listeners... PassRegistrationListener::~PassRegistrationListener() { - sys::SmartScopedLock<true> Lock(ListenersLock); - std::vector<PassRegistrationListener*>::iterator I = - std::find(Listeners->begin(), Listeners->end(), this); - assert(Listeners && I != Listeners->end() && - "PassRegistrationListener not registered!"); - Listeners->erase(I); - - if (Listeners->empty()) { - delete Listeners; - Listeners = 0; - } + PassRegistry::getPassRegistry()->removeRegistrationListener(this); } // enumeratePasses - Iterate over the registered passes, calling the // passEnumerate callback on each PassInfo object. // void PassRegistrationListener::enumeratePasses() { - getPassRegistrar()->EnumerateWith(this); + PassRegistry::getPassRegistry()->enumerateWith(this); } PassNameParser::~PassNameParser() {} @@ -481,7 +250,7 @@ namespace { void passEnumerate(const PassInfo *P) { if (P->isCFGOnlyPass()) - CFGOnlyList.push_back(P); + CFGOnlyList.push_back(P->getTypeInfo()); } }; } @@ -501,15 +270,25 @@ void AnalysisUsage::setPreservesCFG() { GetCFGOnlyPasses(Preserved).enumeratePasses(); } -AnalysisUsage &AnalysisUsage::addRequiredID(AnalysisID ID) { - assert(ID && "Pass class not registered!"); - Required.push_back(ID); +AnalysisUsage &AnalysisUsage::addPreserved(StringRef Arg) { + const PassInfo *PI = Pass::lookupPassInfo(Arg); + // If the pass exists, preserve it. Otherwise silently do nothing. + if (PI) Preserved.push_back(PI->getTypeInfo()); return *this; } -AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(AnalysisID ID) { - assert(ID && "Pass class not registered!"); +AnalysisUsage &AnalysisUsage::addRequiredID(const void *ID) { Required.push_back(ID); - RequiredTransitive.push_back(ID); + return *this; +} + +AnalysisUsage &AnalysisUsage::addRequiredID(char &ID) { + Required.push_back(&ID); + return *this; +} + +AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(char &ID) { + Required.push_back(&ID); + RequiredTransitive.push_back(&ID); return *this; } diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index 296b0d13a710b..ab4d4e55c7504 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// // -// This file implements the LLVM Pass Manager infrastructure. +// This file implements the LLVM Pass Manager infrastructure. // //===----------------------------------------------------------------------===// #include "llvm/PassManagers.h" +#include "llvm/PassManager.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CommandLine.h" @@ -24,8 +25,6 @@ #include "llvm/Support/PassNameParser.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Mutex.h" -#include "llvm/System/Threading.h" -#include "llvm-c/Core.h" #include <algorithm> #include <cstdio> #include <map> @@ -82,30 +81,32 @@ PrintAfterAll("print-after-all", /// This is a helper to determine whether to print IR before or /// after a pass. -static bool ShouldPrintBeforeOrAfterPass(Pass *P, +static bool ShouldPrintBeforeOrAfterPass(const void *PassID, PassOptionList &PassesToPrint) { - for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) { - const llvm::PassInfo *PassInf = PassesToPrint[i]; - if (PassInf && P->getPassInfo()) - if (PassInf->getPassArgument() == - P->getPassInfo()->getPassArgument()) { - return true; - } + if (const llvm::PassInfo *PI = + PassRegistry::getPassRegistry()->getPassInfo(PassID)) { + for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) { + const llvm::PassInfo *PassInf = PassesToPrint[i]; + if (PassInf) + if (PassInf->getPassArgument() == PI->getPassArgument()) { + return true; + } + } } return false; } - + /// This is a utility to check whether a pass should have IR dumped /// before it. -static bool ShouldPrintBeforePass(Pass *P) { - return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(P, PrintBefore); +static bool ShouldPrintBeforePass(const void *PassID) { + return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PassID, PrintBefore); } /// This is a utility to check whether a pass should have IR dumped /// after it. -static bool ShouldPrintAfterPass(Pass *P) { - return PrintAfterAll || ShouldPrintBeforeOrAfterPass(P, PrintAfter); +static bool ShouldPrintAfterPass(const void *PassID) { + return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PassID, PrintAfter); } } // End of llvm namespace @@ -124,9 +125,9 @@ void PassManagerPrettyStackEntry::print(raw_ostream &OS) const { OS << "Releasing pass '"; else OS << "Running pass '"; - + OS << P->getPassName() << "'"; - + if (M) { OS << " on module '" << M->getModuleIdentifier() << "'.\n"; return; @@ -162,8 +163,8 @@ class BBPassManager : public PMDataManager, public FunctionPass { public: static char ID; - explicit BBPassManager(int Depth) - : PMDataManager(Depth), FunctionPass(&ID) {} + explicit BBPassManager(int Depth) + : PMDataManager(Depth), FunctionPass(ID) {} /// Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the function, and if so, return true. @@ -202,8 +203,8 @@ public: return BP; } - virtual PassManagerType getPassManagerType() const { - return PMT_BasicBlockPassManager; + virtual PassManagerType getPassManagerType() const { + return PMT_BasicBlockPassManager; } }; @@ -223,9 +224,9 @@ private: bool wasRun; public: static char ID; - explicit FunctionPassManagerImpl(int Depth) : - Pass(PT_PassManager, &ID), PMDataManager(Depth), - PMTopLevelManager(TLM_Function), wasRun(false) { } + explicit FunctionPassManagerImpl(int Depth) : + Pass(PT_PassManager, ID), PMDataManager(Depth), + PMTopLevelManager(new FPPassManager(1)), wasRun(false) {} /// add - Add a pass to the queue of passes to run. This passes ownership of /// the Pass to the PassManager. When the PassManager is destroyed, the pass @@ -234,8 +235,8 @@ public: void add(Pass *P) { schedulePass(P); } - - /// createPrinterPass - Get a function printer pass. + + /// createPrinterPass - Get a function printer pass. Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const { return createPrintFunctionPass(Banner, &O); } @@ -251,12 +252,12 @@ public: /// doInitialization - Run all of the initializers for the function passes. /// bool doInitialization(Module &M); - + /// doFinalization - Run all of the finalizers for the function passes. /// bool doFinalization(Module &M); - + virtual PMDataManager *getAsPMDataManager() { return this; } virtual Pass *getAsPass() { return this; } @@ -265,7 +266,7 @@ public: Info.setPreservesAll(); } - inline void addTopLevelPass(Pass *P) { + void addTopLevelPass(Pass *P) { if (ImmutablePass *IP = P->getAsImmutablePass()) { // P is a immutable pass and it will be managed by this // top level manager. Set up analysis resolver to connect them. @@ -288,6 +289,7 @@ public: }; char FunctionPassManagerImpl::ID = 0; + //===----------------------------------------------------------------------===// // MPPassManager // @@ -298,11 +300,11 @@ class MPPassManager : public Pass, public PMDataManager { public: static char ID; explicit MPPassManager(int Depth) : - Pass(PT_PassManager, &ID), PMDataManager(Depth) { } + Pass(PT_PassManager, ID), PMDataManager(Depth) { } // Delete on the fly managers. virtual ~MPPassManager() { - for (std::map<Pass *, FunctionPassManagerImpl *>::iterator + for (std::map<Pass *, FunctionPassManagerImpl *>::iterator I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end(); I != E; ++I) { FunctionPassManagerImpl *FPP = I->second; @@ -310,7 +312,7 @@ public: } } - /// createPrinterPass - Get a module printer pass. + /// createPrinterPass - Get a module printer pass. Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const { return createPrintModulePass(&O, false, Banner); } @@ -329,10 +331,10 @@ public: /// through getAnalysis interface. virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass); - /// Return function pass corresponding to PassInfo PI, that is + /// Return function pass corresponding to PassInfo PI, that is /// required by module pass MP. Instantiate analysis pass, by using /// its runOnFunction() for function F. - virtual Pass* getOnTheFlyPass(Pass *MP, const PassInfo *PI, Function &F); + virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F); virtual const char *getPassName() const { return "Module Pass Manager"; @@ -360,8 +362,8 @@ public: return static_cast<ModulePass *>(PassVector[N]); } - virtual PassManagerType getPassManagerType() const { - return PMT_ModulePassManager; + virtual PassManagerType getPassManagerType() const { + return PMT_ModulePassManager; } private: @@ -383,8 +385,8 @@ class PassManagerImpl : public Pass, public: static char ID; explicit PassManagerImpl(int Depth) : - Pass(PT_PassManager, &ID), PMDataManager(Depth), - PMTopLevelManager(TLM_Pass) { } + Pass(PT_PassManager, ID), PMDataManager(Depth), + PMTopLevelManager(new MPPassManager(1)) {} /// add - Add a pass to the queue of passes to run. This passes ownership of /// the Pass to the PassManager. When the PassManager is destroyed, the pass @@ -393,8 +395,8 @@ public: void add(Pass *P) { schedulePass(P); } - - /// createPrinterPass - Get a module printer pass. + + /// createPrinterPass - Get a module printer pass. Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const { return createPrintModulePass(&O, false, Banner); } @@ -408,7 +410,7 @@ public: Info.setPreservesAll(); } - inline void addTopLevelPass(Pass *P) { + void addTopLevelPass(Pass *P) { if (ImmutablePass *IP = P->getAsImmutablePass()) { // P is a immutable pass and it will be managed by this // top level manager. Set up analysis resolver to connect them. @@ -451,7 +453,7 @@ class TimingInfo { public: // Use 'create' member to get this. TimingInfo() : TG("... Pass execution timing report ...") {} - + // TimingDtor - Print out information about timing information ~TimingInfo() { // Delete all of the timers, which accumulate their info into the @@ -469,7 +471,7 @@ public: /// getPassTimer - Return the timer for the specified pass if it exists. Timer *getPassTimer(Pass *P) { - if (P->getAsPMDataManager()) + if (P->getAsPMDataManager()) return 0; sys::SmartScopedLock<true> Lock(*TimingInfoMutex); @@ -488,28 +490,20 @@ static TimingInfo *TheTimeInfo; // PMTopLevelManager implementation /// Initialize top level manager. Create first pass manager. -PMTopLevelManager::PMTopLevelManager(enum TopLevelManagerType t) { - if (t == TLM_Pass) { - MPPassManager *MPP = new MPPassManager(1); - MPP->setTopLevelManager(this); - addPassManager(MPP); - activeStack.push(MPP); - } else if (t == TLM_Function) { - FPPassManager *FPP = new FPPassManager(1); - FPP->setTopLevelManager(this); - addPassManager(FPP); - activeStack.push(FPP); - } +PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) { + PMDM->setTopLevelManager(this); + addPassManager(PMDM); + activeStack.push(PMDM); } /// Set pass P as the last user of the given analysis passes. -void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses, +void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses, Pass *P) { for (SmallVector<Pass *, 12>::iterator I = AnalysisPasses.begin(), E = AnalysisPasses.end(); I != E; ++I) { Pass *AP = *I; LastUser[AP] = P; - + if (P == AP) continue; @@ -528,7 +522,7 @@ void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses, /// Collect passes whose last user is P void PMTopLevelManager::collectLastUses(SmallVector<Pass *, 12> &LastUses, Pass *P) { - DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI = + DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI = InversedLastUser.find(P); if (DMI == InversedLastUser.end()) return; @@ -544,7 +538,7 @@ void PMTopLevelManager::collectLastUses(SmallVector<Pass *, 12> &LastUses, AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { AnalysisUsage *AnUsage = NULL; DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P); - if (DMI != AnUsageMap.end()) + if (DMI != AnUsageMap.end()) AnUsage = DMI->second; else { AnUsage = new AnalysisUsage(); @@ -568,8 +562,9 @@ void PMTopLevelManager::schedulePass(Pass *P) { // If P is an analysis pass and it is available then do not // generate the analysis again. Stale analysis info should not be // available at this point. - if (P->getPassInfo() && - P->getPassInfo()->isAnalysis() && findAnalysisPass(P->getPassInfo())) { + const PassInfo *PI = + PassRegistry::getPassRegistry()->getPassInfo(P->getPassID()); + if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) { delete P; return; } @@ -579,14 +574,15 @@ void PMTopLevelManager::schedulePass(Pass *P) { bool checkAnalysis = true; while (checkAnalysis) { checkAnalysis = false; - + const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet(); for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) { - + Pass *AnalysisPass = findAnalysisPass(*I); if (!AnalysisPass) { - AnalysisPass = (*I)->createPass(); + const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); + AnalysisPass = PI->createPass(); if (P->getPotentialPassManagerType () == AnalysisPass->getPotentialPassManagerType()) // Schedule analysis pass that is managed by the same pass manager. @@ -595,12 +591,12 @@ void PMTopLevelManager::schedulePass(Pass *P) { AnalysisPass->getPotentialPassManagerType()) { // Schedule analysis pass that is managed by a new manager. schedulePass(AnalysisPass); - // Recheck analysis passes to ensure that required analysises that + // Recheck analysis passes to ensure that required analyses that // are already checked are still available. checkAnalysis = true; } else - // Do not schedule this analysis. Lower level analsyis + // Do not schedule this analysis. Lower level analsyis // passes are run on the fly. delete AnalysisPass; } @@ -632,16 +628,21 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) { for (SmallVector<ImmutablePass *, 8>::iterator I = ImmutablePasses.begin(), E = ImmutablePasses.end(); P == NULL && I != E; ++I) { - const PassInfo *PI = (*I)->getPassInfo(); + AnalysisID PI = (*I)->getPassID(); if (PI == AID) P = *I; // If Pass not found then check the interfaces implemented by Immutable Pass if (!P) { + const PassInfo *PassInf = + PassRegistry::getPassRegistry()->getPassInfo(PI); const std::vector<const PassInfo*> &ImmPI = - PI->getInterfacesImplemented(); - if (std::find(ImmPI.begin(), ImmPI.end(), AID) != ImmPI.end()) - P = *I; + PassInf->getInterfacesImplemented(); + for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(), + EE = ImmPI.end(); II != EE; ++II) { + if ((*II)->getTypeInfo() == AID) + P = *I; + } } } @@ -658,7 +659,7 @@ void PMTopLevelManager::dumpPasses() const { for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) { ImmutablePasses[i]->dumpPassStructure(0); } - + // Every class that derives from PMDataManager also derives from Pass // (sometimes indirectly), but there's no inheritance relationship // between PMDataManager and Pass, so we have to getAsPass to get @@ -684,15 +685,16 @@ void PMTopLevelManager::initializeAllAnalysisInfo() { for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(), E = PassManagers.end(); I != E; ++I) (*I)->initializeAnalysisInfo(); - + // Initailize other pass managers - for (SmallVector<PMDataManager *, 8>::iterator I = IndirectPassManagers.begin(), - E = IndirectPassManagers.end(); I != E; ++I) + for (SmallVector<PMDataManager *, 8>::iterator + I = IndirectPassManagers.begin(), E = IndirectPassManagers.end(); + I != E; ++I) (*I)->initializeAnalysisInfo(); for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(), DME = LastUser.end(); DMI != DME; ++DMI) { - DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI = + DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI = InversedLastUser.find(DMI->second); if (InvDMI != InversedLastUser.end()) { SmallPtrSet<Pass *, 8> &L = InvDMI->second; @@ -709,7 +711,7 @@ PMTopLevelManager::~PMTopLevelManager() { for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(), E = PassManagers.end(); I != E; ++I) delete *I; - + for (SmallVector<ImmutablePass *, 8>::iterator I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I) delete *I; @@ -724,16 +726,19 @@ PMTopLevelManager::~PMTopLevelManager() { /// Augement AvailableAnalysis by adding analysis made available by pass P. void PMDataManager::recordAvailableAnalysis(Pass *P) { - const PassInfo *PI = P->getPassInfo(); - if (PI == 0) return; - + AnalysisID PI = P->getPassID(); + AvailableAnalysis[PI] = P; - //This pass is the current implementation of all of the interfaces it - //implements as well. - const std::vector<const PassInfo*> &II = PI->getInterfacesImplemented(); + assert(!AvailableAnalysis.empty()); + + // This pass is the current implementation of all of the interfaces it + // implements as well. + const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI); + if (PInf == 0) return; + const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented(); for (unsigned i = 0, e = II.size(); i != e; ++i) - AvailableAnalysis[II[i]] = P; + AvailableAnalysis[II[i]->getTypeInfo()] = P; } // Return true if P preserves high level analysis used by other @@ -742,18 +747,18 @@ bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) { AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); if (AnUsage->getPreservesAll()) return true; - + const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet(); for (SmallVector<Pass *, 8>::iterator I = HigherLevelAnalysis.begin(), E = HigherLevelAnalysis.end(); I != E; ++I) { Pass *P1 = *I; if (P1->getAsImmutablePass() == 0 && std::find(PreservedSet.begin(), PreservedSet.end(), - P1->getPassInfo()) == + P1->getPassID()) == PreservedSet.end()) return false; } - + return true; } @@ -788,7 +793,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { E = AvailableAnalysis.end(); I != E; ) { std::map<AnalysisID, Pass*>::iterator Info = I++; if (Info->second->getAsImmutablePass() == 0 && - std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == + std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == PreservedSet.end()) { // Remove this analysis if (PassDebugging >= Details) { @@ -807,12 +812,12 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { if (!InheritedAnalysis[Index]) continue; - for (std::map<AnalysisID, Pass*>::iterator + for (std::map<AnalysisID, Pass*>::iterator I = InheritedAnalysis[Index]->begin(), E = InheritedAnalysis[Index]->end(); I != E; ) { std::map<AnalysisID, Pass *>::iterator Info = I++; if (Info->second->getAsImmutablePass() == 0 && - std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == + std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == PreservedSet.end()) { // Remove this analysis if (PassDebugging >= Details) { @@ -861,23 +866,24 @@ void PMDataManager::freePass(Pass *P, StringRef Msg, P->releaseMemory(); } - if (const PassInfo *PI = P->getPassInfo()) { + AnalysisID PI = P->getPassID(); + if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) { // Remove the pass itself (if it is not already removed). AvailableAnalysis.erase(PI); // Remove all interfaces this pass implements, for which it is also // listed as the available implementation. - const std::vector<const PassInfo*> &II = PI->getInterfacesImplemented(); + const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented(); for (unsigned i = 0, e = II.size(); i != e; ++i) { std::map<AnalysisID, Pass*>::iterator Pos = - AvailableAnalysis.find(II[i]); + AvailableAnalysis.find(II[i]->getTypeInfo()); if (Pos != AvailableAnalysis.end() && Pos->second == P) AvailableAnalysis.erase(Pos); } } } -/// Add pass P into the PassVector. Update +/// Add pass P into the PassVector. Update /// AvailableAnalysis appropriately if ProcessAnalysis is true. void PMDataManager::add(Pass *P, bool ProcessAnalysis) { // This manager is going to manage pass P. Set up analysis resolver @@ -902,7 +908,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) { unsigned PDepth = this->getDepth(); - collectRequiredAnalysis(RequiredPasses, + collectRequiredAnalysis(RequiredPasses, ReqAnalysisNotAvailable, P); for (SmallVector<Pass *, 8>::iterator I = RequiredPasses.begin(), E = RequiredPasses.end(); I != E; ++I) { @@ -920,7 +926,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) { TransferLastUses.push_back(PRequired); // Keep track of higher level analysis used by this manager. HigherLevelAnalysis.push_back(PRequired); - } else + } else llvm_unreachable("Unable to accomodate Required Pass"); } @@ -937,11 +943,12 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) { TransferLastUses.clear(); } - // Now, take care of required analysises that are not available. - for (SmallVector<AnalysisID, 8>::iterator - I = ReqAnalysisNotAvailable.begin(), + // Now, take care of required analyses that are not available. + for (SmallVector<AnalysisID, 8>::iterator + I = ReqAnalysisNotAvailable.begin(), E = ReqAnalysisNotAvailable.end() ;I != E; ++I) { - Pass *AnalysisPass = (*I)->createPass(); + const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); + Pass *AnalysisPass = PI->createPass(); this->addLowerLevelRequiredPass(P, AnalysisPass); } @@ -963,10 +970,10 @@ void PMDataManager::collectRequiredAnalysis(SmallVector<Pass *, 8>&RP, Pass *P) { AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet(); - for (AnalysisUsage::VectorType::const_iterator + for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) { if (Pass *AnalysisPass = findAnalysisPass(*I, true)) - RP.push_back(AnalysisPass); + RP.push_back(AnalysisPass); else RP_NotAvail.push_back(*I); } @@ -975,7 +982,7 @@ void PMDataManager::collectRequiredAnalysis(SmallVector<Pass *, 8>&RP, for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(), E = IDs.end(); I != E; ++I) { if (Pass *AnalysisPass = findAnalysisPass(*I, true)) - RP.push_back(AnalysisPass); + RP.push_back(AnalysisPass); else RP_NotAvail.push_back(*I); } @@ -1016,7 +1023,7 @@ Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) { // Search Parents through TopLevelManager if (SearchParent) return TPM->findAnalysisPass(AID); - + return NULL; } @@ -1030,7 +1037,7 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{ return; TPM->collectLastUses(LUses, P); - + for (SmallVector<Pass *, 12>::iterator I = LUses.begin(), E = LUses.end(); I != E; ++I) { llvm::dbgs() << "--" << std::string(Offset*2, ' '); @@ -1044,7 +1051,8 @@ void PMDataManager::dumpPassArguments() const { if (PMDataManager *PMD = (*I)->getAsPMDataManager()) PMD->dumpPassArguments(); else - if (const PassInfo *PI = (*I)->getPassInfo()) + if (const PassInfo *PI = + PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) if (!PI->isAnalysisGroup()) dbgs() << " -" << PI->getPassArgument(); } @@ -1093,7 +1101,7 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1, void PMDataManager::dumpRequiredSet(const Pass *P) const { if (PassDebugging < Details) return; - + AnalysisUsage analysisUsage; P->getAnalysisUsage(analysisUsage); dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet()); @@ -1102,7 +1110,7 @@ void PMDataManager::dumpRequiredSet(const Pass *P) const { void PMDataManager::dumpPreservedSet(const Pass *P) const { if (PassDebugging < Details) return; - + AnalysisUsage analysisUsage; P->getAnalysisUsage(analysisUsage); dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet()); @@ -1116,7 +1124,8 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P, dbgs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; for (unsigned i = 0; i != Set.size(); ++i) { if (i) dbgs() << ','; - dbgs() << ' ' << Set[i]->getPassName(); + const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]); + dbgs() << ' ' << PInf->getPassName(); } dbgs() << '\n'; } @@ -1131,14 +1140,14 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { TPM->dumpPasses(); } - // Module Level pass may required Function Level analysis info - // (e.g. dominator info). Pass manager uses on the fly function pass manager - // to provide this on demand. In that case, in Pass manager terminology, + // Module Level pass may required Function Level analysis info + // (e.g. dominator info). Pass manager uses on the fly function pass manager + // to provide this on demand. In that case, in Pass manager terminology, // module level pass is requiring lower level analysis info managed by // lower level pass manager. // When Pass manager is not able to order required analysis info, Pass manager - // checks whether any lower level manager will be able to provide this + // checks whether any lower level manager will be able to provide this // analysis info on demand or not. #ifndef NDEBUG dbgs() << "Unable to schedule '" << RequiredPass->getPassName(); @@ -1147,7 +1156,7 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { llvm_unreachable("Unable to schedule pass"); } -Pass *PMDataManager::getOnTheFlyPass(Pass *P, const PassInfo *PI, Function &F) { +Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) { assert(0 && "Unable to find on the fly pass"); return NULL; } @@ -1166,7 +1175,7 @@ Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const { return PM.findAnalysisPass(ID, dir); } -Pass *AnalysisResolver::findImplPass(Pass *P, const PassInfo *AnalysisPI, +Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI, Function &F) { return PM.getOnTheFlyPass(P, AnalysisPI, F); } @@ -1174,8 +1183,8 @@ Pass *AnalysisResolver::findImplPass(Pass *P, const PassInfo *AnalysisPI, //===----------------------------------------------------------------------===// // BBPassManager implementation -/// Execute all of the passes scheduled for execution by invoking -/// runOnBasicBlock method. Keep track of whether any of the passes modifies +/// Execute all of the passes scheduled for execution by invoking +/// runOnBasicBlock method. Keep track of whether any of the passes modifies /// the function, and if so, return true. bool BBPassManager::runOnFunction(Function &F) { if (F.isDeclaration()) @@ -1202,7 +1211,7 @@ bool BBPassManager::runOnFunction(Function &F) { } Changed |= LocalChanged; - if (LocalChanged) + if (LocalChanged) dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG, I->getName()); dumpPreservedSet(BP); @@ -1286,17 +1295,18 @@ void FunctionPassManager::addImpl(Pass *P) { /// PassManager_X is destroyed, the pass will be destroyed as well, so /// there is no need to delete the pass. (TODO delete passes.) /// This implies that all passes MUST be allocated with 'new'. -void FunctionPassManager::add(Pass *P) { +void FunctionPassManager::add(Pass *P) { // If this is a not a function pass, don't add a printer for it. + const void *PassID = P->getPassID(); if (P->getPassKind() == PT_Function) - if (ShouldPrintBeforePass(P)) + if (ShouldPrintBeforePass(PassID)) addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***")); addImpl(P); if (P->getPassKind() == PT_Function) - if (ShouldPrintAfterPass(P)) + if (ShouldPrintAfterPass(PassID)) addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***")); } @@ -1405,8 +1415,8 @@ void FPPassManager::dumpPassStructure(unsigned Offset) { } -/// Execute all of the passes scheduled for execution by invoking -/// runOnFunction method. Keep track of whether any of the passes modifies +/// Execute all of the passes scheduled for execution by invoking +/// runOnFunction method. Keep track of whether any of the passes modifies /// the function, and if so, return true. bool FPPassManager::runOnFunction(Function &F) { if (F.isDeclaration()) @@ -1476,8 +1486,8 @@ bool FPPassManager::doFinalization(Module &M) { //===----------------------------------------------------------------------===// // MPPassManager implementation -/// Execute all of the passes scheduled for execution by invoking -/// runOnModule method. Keep track of whether any of the passes modifies +/// Execute all of the passes scheduled for execution by invoking +/// runOnModule method. Keep track of whether any of the passes modifies /// the module, and if so, return true. bool MPPassManager::runOnModule(Module &M) { @@ -1512,7 +1522,7 @@ MPPassManager::runOnModule(Module &M) { dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG, M.getModuleIdentifier()); dumpPreservedSet(MP); - + verifyPreservedAnalysis(MP); removeNotPreservedAnalysis(MP); recordAvailableAnalysis(MP); @@ -1538,7 +1548,7 @@ MPPassManager::runOnModule(Module &M) { void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { assert(P->getPotentialPassManagerType() == PMT_ModulePassManager && "Unable to handle Pass that requires lower level Analysis pass"); - assert((P->getPotentialPassManagerType() < + assert((P->getPotentialPassManagerType() < RequiredPass->getPotentialPassManagerType()) && "Unable to handle Pass that requires lower level Analysis pass"); @@ -1558,13 +1568,13 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { FPP->setLastUser(LU, P); } -/// Return function pass corresponding to PassInfo PI, that is +/// Return function pass corresponding to PassInfo PI, that is /// required by module pass MP. Instantiate analysis pass, by using /// its runOnFunction() for function F. -Pass* MPPassManager::getOnTheFlyPass(Pass *MP, const PassInfo *PI, Function &F){ +Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){ FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP]; assert(FPP && "Unable to find on the fly pass"); - + FPP->releaseMemoryOnTheFly(); FPP->run(F); return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI); @@ -1614,13 +1624,14 @@ void PassManager::addImpl(Pass *P) { /// will be destroyed as well, so there is no need to delete the pass. This /// implies that all passes MUST be allocated with 'new'. void PassManager::add(Pass *P) { - if (ShouldPrintBeforePass(P)) + const void* PassID = P->getPassID(); + if (ShouldPrintBeforePass(PassID)) addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***")); addImpl(P); - if (ShouldPrintAfterPass(P)) + if (ShouldPrintAfterPass(PassID)) addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***")); } @@ -1656,7 +1667,7 @@ void TimingInfo::createTheTimeInfo() { /// If TimingInfo is enabled then start pass timer. Timer *llvm::getPassTimer(Pass *P) { - if (TheTimeInfo) + if (TheTimeInfo) return TheTimeInfo->getPassTimer(P); return 0; } @@ -1690,8 +1701,8 @@ void PMStack::push(PMDataManager *PM) { } // Dump content of the pass manager stack. -void PMStack::dump() { - for (std::deque<PMDataManager *>::iterator I = S.begin(), +void PMStack::dump() const { + for (std::vector<PMDataManager *>::const_iterator I = S.begin(), E = S.end(); I != E; ++I) printf("%s ", (*I)->getAsPass()->getPassName()); @@ -1700,11 +1711,11 @@ void PMStack::dump() { } /// Find appropriate Module Pass Manager in the PM Stack and -/// add self into that manager. -void ModulePass::assignPassManager(PMStack &PMS, +/// add self into that manager. +void ModulePass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { // Find Module Pass Manager - while(!PMS.empty()) { + while (!PMS.empty()) { PassManagerType TopPMType = PMS.top()->getPassManagerType(); if (TopPMType == PreferredType) break; // We found desired pass manager @@ -1718,7 +1729,7 @@ void ModulePass::assignPassManager(PMStack &PMS, } /// Find appropriate Function Pass Manager or Call Graph Pass Manager -/// in the PM Stack and add self into that manager. +/// in the PM Stack and add self into that manager. void FunctionPass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { @@ -1727,7 +1738,7 @@ void FunctionPass::assignPassManager(PMStack &PMS, if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager) PMS.pop(); else - break; + break; } // Create new Function Pass Manager if needed. @@ -1759,14 +1770,14 @@ void FunctionPass::assignPassManager(PMStack &PMS, } /// Find appropriate Basic Pass Manager or Call Graph Pass Manager -/// in the PM Stack and add self into that manager. +/// in the PM Stack and add self into that manager. void BasicBlockPass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { BBPassManager *BBP; // Basic Pass Manager is a leaf pass manager. It does not handle // any other pass manager. - if (!PMS.empty() && + if (!PMS.empty() && PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) { BBP = (BBPassManager *)PMS.top(); } else { @@ -1796,38 +1807,3 @@ void BasicBlockPass::assignPassManager(PMStack &PMS, } PassManagerBase::~PassManagerBase() {} - -/*===-- C Bindings --------------------------------------------------------===*/ - -LLVMPassManagerRef LLVMCreatePassManager() { - return wrap(new PassManager()); -} - -LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) { - return wrap(new FunctionPassManager(unwrap(M))); -} - -LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) { - return LLVMCreateFunctionPassManagerForModule( - reinterpret_cast<LLVMModuleRef>(P)); -} - -LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) { - return unwrap<PassManager>(PM)->run(*unwrap(M)); -} - -LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) { - return unwrap<FunctionPassManager>(FPM)->doInitialization(); -} - -LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) { - return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F)); -} - -LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) { - return unwrap<FunctionPassManager>(FPM)->doFinalization(); -} - -void LLVMDisposePassManager(LLVMPassManagerRef PM) { - delete unwrap(PM); -} diff --git a/lib/VMCore/PassRegistry.cpp b/lib/VMCore/PassRegistry.cpp new file mode 100644 index 0000000000000..21dba56aad728 --- /dev/null +++ b/lib/VMCore/PassRegistry.cpp @@ -0,0 +1,159 @@ +//===- PassRegistry.cpp - Pass Registration Implementation ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PassRegistry, with which passes are registered on +// initialization, and supports the PassManager in dependency resolution. +// +//===----------------------------------------------------------------------===// + +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; + +static PassRegistry *PassRegistryObj = 0; +PassRegistry *PassRegistry::getPassRegistry() { + // Use double-checked locking to safely initialize the registrar when + // we're running in multithreaded mode. + PassRegistry* tmp = PassRegistryObj; + if (llvm_is_multithreaded()) { + sys::MemoryFence(); + if (!tmp) { + llvm_acquire_global_lock(); + tmp = PassRegistryObj; + if (!tmp) { + tmp = new PassRegistry(); + sys::MemoryFence(); + PassRegistryObj = tmp; + } + llvm_release_global_lock(); + } + } else if (!tmp) { + PassRegistryObj = new PassRegistry(); + } + + return PassRegistryObj; +} + +namespace { + +// FIXME: We use ManagedCleanup to erase the pass registrar on shutdown. +// Unfortunately, passes are registered with static ctors, and having +// llvm_shutdown clear this map prevents successful ressurection after +// llvm_shutdown is run. Ideally we should find a solution so that we don't +// leak the map, AND can still resurrect after shutdown. +void cleanupPassRegistry(void*) { + if (PassRegistryObj) { + delete PassRegistryObj; + PassRegistryObj = 0; + } +} +ManagedCleanup<&cleanupPassRegistry> registryCleanup ATTRIBUTE_USED; + +} + +const PassInfo *PassRegistry::getPassInfo(const void *TI) const { + sys::SmartScopedLock<true> Guard(Lock); + MapType::const_iterator I = PassInfoMap.find(TI); + return I != PassInfoMap.end() ? I->second : 0; +} + +const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const { + sys::SmartScopedLock<true> Guard(Lock); + StringMapType::const_iterator I = PassInfoStringMap.find(Arg); + return I != PassInfoStringMap.end() ? I->second : 0; +} + +//===----------------------------------------------------------------------===// +// Pass Registration mechanism +// + +void PassRegistry::registerPass(const PassInfo &PI) { + sys::SmartScopedLock<true> Guard(Lock); + bool Inserted = + PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second; + assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted; + PassInfoStringMap[PI.getPassArgument()] = &PI; + + // Notify any listeners. + for (std::vector<PassRegistrationListener*>::iterator + I = Listeners.begin(), E = Listeners.end(); I != E; ++I) + (*I)->passRegistered(&PI); +} + +void PassRegistry::unregisterPass(const PassInfo &PI) { + sys::SmartScopedLock<true> Guard(Lock); + MapType::iterator I = PassInfoMap.find(PI.getTypeInfo()); + assert(I != PassInfoMap.end() && "Pass registered but not in map!"); + + // Remove pass from the map. + PassInfoMap.erase(I); + PassInfoStringMap.erase(PI.getPassArgument()); +} + +void PassRegistry::enumerateWith(PassRegistrationListener *L) { + sys::SmartScopedLock<true> Guard(Lock); + for (MapType::const_iterator I = PassInfoMap.begin(), + E = PassInfoMap.end(); I != E; ++I) + L->passEnumerate(I->second); +} + + +/// Analysis Group Mechanisms. +void PassRegistry::registerAnalysisGroup(const void *InterfaceID, + const void *PassID, + PassInfo& Registeree, + bool isDefault) { + PassInfo *InterfaceInfo = const_cast<PassInfo*>(getPassInfo(InterfaceID)); + if (InterfaceInfo == 0) { + // First reference to Interface, register it now. + registerPass(Registeree); + InterfaceInfo = &Registeree; + } + assert(Registeree.isAnalysisGroup() && + "Trying to join an analysis group that is a normal pass!"); + + if (PassID) { + PassInfo *ImplementationInfo = const_cast<PassInfo*>(getPassInfo(PassID)); + assert(ImplementationInfo && + "Must register pass before adding to AnalysisGroup!"); + + // Make sure we keep track of the fact that the implementation implements + // the interface. + ImplementationInfo->addInterfaceImplemented(InterfaceInfo); + + sys::SmartScopedLock<true> Guard(Lock); + AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo]; + assert(AGI.Implementations.count(ImplementationInfo) == 0 && + "Cannot add a pass to the same analysis group more than once!"); + AGI.Implementations.insert(ImplementationInfo); + if (isDefault) { + assert(InterfaceInfo->getNormalCtor() == 0 && + "Default implementation for analysis group already specified!"); + assert(ImplementationInfo->getNormalCtor() && + "Cannot specify pass as default if it does not have a default ctor"); + InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor()); + } + } +} + +void PassRegistry::addRegistrationListener(PassRegistrationListener *L) { + sys::SmartScopedLock<true> Guard(Lock); + Listeners.push_back(L); +} + +void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) { + sys::SmartScopedLock<true> Guard(Lock); + std::vector<PassRegistrationListener*>::iterator I = + std::find(Listeners.begin(), Listeners.end(), L); + assert(I != Listeners.end() && "PassRegistrationListener not registered!"); + Listeners.erase(I); +} diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp index 2d69dce07f3f7..2ee49d235963c 100644 --- a/lib/VMCore/PrintModulePass.cpp +++ b/lib/VMCore/PrintModulePass.cpp @@ -28,10 +28,10 @@ namespace { bool DeleteStream; // Delete the ostream in our dtor? public: static char ID; - PrintModulePass() : ModulePass(&ID), Out(&dbgs()), + PrintModulePass() : ModulePass(ID), Out(&dbgs()), DeleteStream(false) {} PrintModulePass(const std::string &B, raw_ostream *o, bool DS) - : ModulePass(&ID), Banner(B), Out(o), DeleteStream(DS) {} + : ModulePass(ID), Banner(B), Out(o), DeleteStream(DS) {} ~PrintModulePass() { if (DeleteStream) delete Out; @@ -53,12 +53,12 @@ namespace { bool DeleteStream; // Delete the ostream in our dtor? public: static char ID; - PrintFunctionPass() : FunctionPass(&ID), Banner(""), Out(&dbgs()), + PrintFunctionPass() : FunctionPass(ID), Banner(""), Out(&dbgs()), DeleteStream(false) {} PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS) - : FunctionPass(&ID), Banner(B), Out(o), DeleteStream(DS) {} + : FunctionPass(ID), Banner(B), Out(o), DeleteStream(DS) {} - inline ~PrintFunctionPass() { + ~PrintFunctionPass() { if (DeleteStream) delete Out; } @@ -77,11 +77,11 @@ namespace { } char PrintModulePass::ID = 0; -static RegisterPass<PrintModulePass> -X("print-module", "Print module to stderr"); +INITIALIZE_PASS(PrintModulePass, "print-module", + "Print module to stderr", false, false); char PrintFunctionPass::ID = 0; -static RegisterPass<PrintFunctionPass> -Y("print-function","Print function to stderr"); +INITIALIZE_PASS(PrintFunctionPass, "print-function", + "Print function to stderr", false, false); /// createPrintModulePass - Create and return a pass that writes the /// module to the specified raw_ostream. diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index 845b523c24216..c55e6267836ac 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -50,7 +50,7 @@ void AbstractTypeUser::setType(Value *V, const Type *NewTy) { /// Because of the way Type subclasses are allocated, this function is necessary /// to use the correct kind of "delete" operator to deallocate the Type object. -/// Some type objects (FunctionTy, StructTy, UnionTy) allocate additional space +/// Some type objects (FunctionTy, StructTy) allocate additional space /// after the space for their derived type to hold the contained types array of /// PATypeHandles. Using this allocation scheme means all the PATypeHandles are /// allocated with the type object, decreasing allocations and eliminating the @@ -66,8 +66,7 @@ void Type::destroy() const { // Structures and Functions allocate their contained types past the end of // the type object itself. These need to be destroyed differently than the // other types. - if (this->isFunctionTy() || this->isStructTy() || - this->isUnionTy()) { + if (this->isFunctionTy() || this->isStructTy()) { // First, make sure we destruct any PATypeHandles allocated by these // subclasses. They must be manually destructed. for (unsigned i = 0; i < NumContainedTys; ++i) @@ -77,10 +76,10 @@ void Type::destroy() const { // to delete this as an array of char. if (this->isFunctionTy()) static_cast<const FunctionType*>(this)->FunctionType::~FunctionType(); - else if (this->isStructTy()) + else { + assert(isStructTy()); static_cast<const StructType*>(this)->StructType::~StructType(); - else - static_cast<const UnionType*>(this)->UnionType::~UnionType(); + } // Finally, remove the memory as an array deallocation of the chars it was // constructed from. @@ -234,7 +233,7 @@ bool Type::isSizedDerivedType() const { if (const VectorType *PTy = dyn_cast<VectorType>(this)) return PTy->getElementType()->isSized(); - if (!this->isStructTy() && !this->isUnionTy()) + if (!this->isStructTy()) return false; // Okay, our struct is sized if all of the elements are... @@ -319,31 +318,6 @@ const Type *StructType::getTypeAtIndex(unsigned Idx) const { } -bool UnionType::indexValid(const Value *V) const { - // Union indexes require 32-bit integer constants. - if (V->getType()->isIntegerTy(32)) - if (const ConstantInt *CU = dyn_cast<ConstantInt>(V)) - return indexValid(CU->getZExtValue()); - return false; -} - -bool UnionType::indexValid(unsigned V) const { - return V < NumContainedTys; -} - -// getTypeAtIndex - Given an index value into the type, return the type of the -// element. For a structure type, this must be a constant value... -// -const Type *UnionType::getTypeAtIndex(const Value *V) const { - unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue(); - return getTypeAtIndex(Idx); -} - -const Type *UnionType::getTypeAtIndex(unsigned Idx) const { - assert(indexValid(Idx) && "Invalid structure index!"); - return ContainedTys[Idx]; -} - //===----------------------------------------------------------------------===// // Primitive 'Type' data //===----------------------------------------------------------------------===// @@ -455,8 +429,8 @@ const PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) { /// isValidReturnType - Return true if the specified type is valid as a return /// type. bool FunctionType::isValidReturnType(const Type *RetTy) { - return RetTy->getTypeID() != LabelTyID && - RetTy->getTypeID() != MetadataTyID; + return !RetTy->isFunctionTy() && !RetTy->isLabelTy() && + !RetTy->isMetadataTy(); } /// isValidArgumentType - Return true if the specified type is valid as an @@ -507,23 +481,6 @@ StructType::StructType(LLVMContext &C, setAbstract(isAbstract); } -UnionType::UnionType(LLVMContext &C,const Type* const* Types, unsigned NumTypes) - : CompositeType(C, UnionTyID) { - ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1); - NumContainedTys = NumTypes; - bool isAbstract = false; - for (unsigned i = 0; i < NumTypes; ++i) { - assert(Types[i] && "<null> type for union field!"); - assert(isValidElementType(Types[i]) && - "Invalid type for union element!"); - new (&ContainedTys[i]) PATypeHandle(Types[i], this); - isAbstract |= Types[i]->isAbstract(); - } - - // Calculate whether or not this type is abstract - setAbstract(isAbstract); -} - ArrayType::ArrayType(const Type *ElType, uint64_t NumEl) : SequentialType(ArrayTyID, ElType) { NumElements = NumEl; @@ -603,8 +560,8 @@ namespace llvm { static inline ChildIteratorType child_begin(NodeType *N) { if (N->isAbstract()) return N->subtype_begin(); - else // No need to process children of concrete types. - return N->subtype_end(); + // No need to process children of concrete types. + return N->subtype_end(); } static inline ChildIteratorType child_end(NodeType *N) { return N->subtype_end(); @@ -627,35 +584,35 @@ void Type::PromoteAbstractToConcrete() { // Concrete types are leaves in the tree. Since an SCC will either be all // abstract or all concrete, we only need to check one type. - if (SCC[0]->isAbstract()) { - if (SCC[0]->isOpaqueTy()) - return; // Not going to be concrete, sorry. - - // If all of the children of all of the types in this SCC are concrete, - // then this SCC is now concrete as well. If not, neither this SCC, nor - // any parent SCCs will be concrete, so we might as well just exit. - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - for (Type::subtype_iterator CI = SCC[i]->subtype_begin(), - E = SCC[i]->subtype_end(); CI != E; ++CI) - if ((*CI)->isAbstract()) - // If the child type is in our SCC, it doesn't make the entire SCC - // abstract unless there is a non-SCC abstract type. - if (std::find(SCC.begin(), SCC.end(), *CI) == SCC.end()) - return; // Not going to be concrete, sorry. - - // Okay, we just discovered this whole SCC is now concrete, mark it as - // such! - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - assert(SCC[i]->isAbstract() && "Why are we processing concrete types?"); - - SCC[i]->setAbstract(false); - } - - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - assert(!SCC[i]->isAbstract() && "Concrete type became abstract?"); - // The type just became concrete, notify all users! - cast<DerivedType>(SCC[i])->notifyUsesThatTypeBecameConcrete(); - } + if (!SCC[0]->isAbstract()) continue; + + if (SCC[0]->isOpaqueTy()) + return; // Not going to be concrete, sorry. + + // If all of the children of all of the types in this SCC are concrete, + // then this SCC is now concrete as well. If not, neither this SCC, nor + // any parent SCCs will be concrete, so we might as well just exit. + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + for (Type::subtype_iterator CI = SCC[i]->subtype_begin(), + E = SCC[i]->subtype_end(); CI != E; ++CI) + if ((*CI)->isAbstract()) + // If the child type is in our SCC, it doesn't make the entire SCC + // abstract unless there is a non-SCC abstract type. + if (std::find(SCC.begin(), SCC.end(), *CI) == SCC.end()) + return; // Not going to be concrete, sorry. + + // Okay, we just discovered this whole SCC is now concrete, mark it as + // such! + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + assert(SCC[i]->isAbstract() && "Why are we processing concrete types?"); + + SCC[i]->setAbstract(false); + } + + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + assert(!SCC[i]->isAbstract() && "Concrete type became abstract?"); + // The type just became concrete, notify all users! + cast<DerivedType>(SCC[i])->notifyUsesThatTypeBecameConcrete(); } } } @@ -693,11 +650,15 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2, if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { const IntegerType *ITy2 = cast<IntegerType>(Ty2); return ITy->getBitWidth() == ITy2->getBitWidth(); - } else if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) { + } + + if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) { const PointerType *PTy2 = cast<PointerType>(Ty2); return PTy->getAddressSpace() == PTy2->getAddressSpace() && TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes); - } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { + } + + if (const StructType *STy = dyn_cast<StructType>(Ty)) { const StructType *STy2 = cast<StructType>(Ty2); if (STy->getNumElements() != STy2->getNumElements()) return false; if (STy->isPacked() != STy2->isPacked()) return false; @@ -705,22 +666,21 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2, if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes)) return false; return true; - } else if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) { - const UnionType *UTy2 = cast<UnionType>(Ty2); - if (UTy->getNumElements() != UTy2->getNumElements()) return false; - for (unsigned i = 0, e = UTy2->getNumElements(); i != e; ++i) - if (!TypesEqual(UTy->getElementType(i), UTy2->getElementType(i), EqTypes)) - return false; - return true; - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + } + + if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { const ArrayType *ATy2 = cast<ArrayType>(Ty2); return ATy->getNumElements() == ATy2->getNumElements() && TypesEqual(ATy->getElementType(), ATy2->getElementType(), EqTypes); - } else if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) { + } + + if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) { const VectorType *PTy2 = cast<VectorType>(Ty2); return PTy->getNumElements() == PTy2->getNumElements() && TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes); - } else if (const FunctionType *FTy = dyn_cast<FunctionType>(Ty)) { + } + + if (const FunctionType *FTy = dyn_cast<FunctionType>(Ty)) { const FunctionType *FTy2 = cast<FunctionType>(Ty2); if (FTy->isVarArg() != FTy2->isVarArg() || FTy->getNumParams() != FTy2->getNumParams() || @@ -731,10 +691,10 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2, return false; } return true; - } else { - llvm_unreachable("Unknown derived type!"); - return false; } + + llvm_unreachable("Unknown derived type!"); + return false; } namespace llvm { // in namespace llvm so findable by ADL @@ -808,13 +768,13 @@ const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) { // Check for the built-in integer types switch (NumBits) { - case 1: return cast<IntegerType>(Type::getInt1Ty(C)); - case 8: return cast<IntegerType>(Type::getInt8Ty(C)); - case 16: return cast<IntegerType>(Type::getInt16Ty(C)); - case 32: return cast<IntegerType>(Type::getInt32Ty(C)); - case 64: return cast<IntegerType>(Type::getInt64Ty(C)); - default: - break; + case 1: return cast<IntegerType>(Type::getInt1Ty(C)); + case 8: return cast<IntegerType>(Type::getInt8Ty(C)); + case 16: return cast<IntegerType>(Type::getInt16Ty(C)); + case 32: return cast<IntegerType>(Type::getInt32Ty(C)); + case 64: return cast<IntegerType>(Type::getInt64Ty(C)); + default: + break; } LLVMContextImpl *pImpl = C.pImpl; @@ -902,8 +862,8 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) { } bool ArrayType::isValidElementType(const Type *ElemTy) { - return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID && - ElemTy->getTypeID() != MetadataTyID && !ElemTy->isFunctionTy(); + return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && + !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); } VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) { @@ -975,60 +935,6 @@ bool StructType::isValidElementType(const Type *ElemTy) { //===----------------------------------------------------------------------===// -// Union Type Factory... -// - -UnionType *UnionType::get(const Type* const* Types, unsigned NumTypes) { - assert(NumTypes > 0 && "union must have at least one member type!"); - UnionValType UTV(Types, NumTypes); - UnionType *UT = 0; - - LLVMContextImpl *pImpl = Types[0]->getContext().pImpl; - - UT = pImpl->UnionTypes.get(UTV); - - if (!UT) { - // Value not found. Derive a new type! - UT = (UnionType*) operator new(sizeof(UnionType) + - sizeof(PATypeHandle) * NumTypes); - new (UT) UnionType(Types[0]->getContext(), Types, NumTypes); - pImpl->UnionTypes.add(UTV, UT); - } -#ifdef DEBUG_MERGE_TYPES - DEBUG(dbgs() << "Derived new type: " << *UT << "\n"); -#endif - return UT; -} - -UnionType *UnionType::get(const Type *type, ...) { - va_list ap; - SmallVector<const llvm::Type*, 8> UnionFields; - va_start(ap, type); - while (type) { - UnionFields.push_back(type); - type = va_arg(ap, llvm::Type*); - } - unsigned NumTypes = UnionFields.size(); - assert(NumTypes > 0 && "union must have at least one member type!"); - return llvm::UnionType::get(&UnionFields[0], NumTypes); -} - -bool UnionType::isValidElementType(const Type *ElemTy) { - return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && - !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy(); -} - -int UnionType::getElementTypeIndex(const Type *ElemTy) const { - int index = 0; - for (UnionType::element_iterator I = element_begin(), E = element_end(); - I != E; ++I, ++index) { - if (ElemTy == *I) return index; - } - - return -1; -} - -//===----------------------------------------------------------------------===// // Pointer Type Factory... // @@ -1060,9 +966,8 @@ const PointerType *Type::getPointerTo(unsigned addrs) const { } bool PointerType::isValidElementType(const Type *ElemTy) { - return ElemTy->getTypeID() != VoidTyID && - ElemTy->getTypeID() != LabelTyID && - ElemTy->getTypeID() != MetadataTyID; + return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && + !ElemTy->isMetadataTy(); } @@ -1071,8 +976,7 @@ bool PointerType::isValidElementType(const Type *ElemTy) { // OpaqueType *OpaqueType::get(LLVMContext &C) { - OpaqueType *OT = new OpaqueType(C); // All opaque types are distinct - + OpaqueType *OT = new OpaqueType(C); // All opaque types are distinct. LLVMContextImpl *pImpl = C.pImpl; pImpl->OpaqueTypes.insert(OT); return OT; @@ -1123,18 +1027,17 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const { << ">[" << (void*)this << "]" << "\n"); #endif - this->destroy(); + this->destroy(); } - } -// unlockedRefineAbstractTypeTo - This function is used when it is discovered +// refineAbstractTypeTo - This function is used when it is discovered // that the 'this' abstract type is actually equivalent to the NewType // specified. This causes all users of 'this' to switch to reference the more // concrete type NewType and for 'this' to be deleted. Only used for internal // callers. // -void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) { +void DerivedType::refineAbstractTypeTo(const Type *NewType) { assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!"); assert(this != NewType && "Can't refine to myself!"); assert(ForwardType == 0 && "This type has already been refined!"); @@ -1199,15 +1102,6 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) { // destroyed. } -// refineAbstractTypeTo - This function is used by external callers to notify -// us that this abstract type is equivalent to another type. -// -void DerivedType::refineAbstractTypeTo(const Type *NewType) { - // All recursive calls will go through unlockedRefineAbstractTypeTo, - // to avoid deadlock problems. - unlockedRefineAbstractTypeTo(NewType); -} - // notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that // the current type has transitioned from being abstract to being concrete. // @@ -1291,21 +1185,6 @@ void StructType::typeBecameConcrete(const DerivedType *AbsTy) { // concrete - this could potentially change us from an abstract type to a // concrete type. // -void UnionType::refineAbstractType(const DerivedType *OldType, - const Type *NewType) { - LLVMContextImpl *pImpl = OldType->getContext().pImpl; - pImpl->UnionTypes.RefineAbstractType(this, OldType, NewType); -} - -void UnionType::typeBecameConcrete(const DerivedType *AbsTy) { - LLVMContextImpl *pImpl = AbsTy->getContext().pImpl; - pImpl->UnionTypes.TypeBecameConcrete(this, AbsTy); -} - -// refineAbstractType - Called when a contained type is found to be more -// concrete - this could potentially change us from an abstract type to a -// concrete type. -// void PointerType::refineAbstractType(const DerivedType *OldType, const Type *NewType) { LLVMContextImpl *pImpl = OldType->getContext().pImpl; diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h index 02ab1135b32cf..5a90917977b02 100644 --- a/lib/VMCore/TypesContext.h +++ b/lib/VMCore/TypesContext.h @@ -180,32 +180,6 @@ public: } }; -// UnionValType - Define a class to hold the key that goes into the TypeMap -// -class UnionValType { - std::vector<const Type*> ElTypes; -public: - UnionValType(const Type* const* Types, unsigned NumTypes) - : ElTypes(&Types[0], &Types[NumTypes]) {} - - static UnionValType get(const UnionType *UT) { - std::vector<const Type *> ElTypes; - ElTypes.reserve(UT->getNumElements()); - for (unsigned i = 0, e = UT->getNumElements(); i != e; ++i) - ElTypes.push_back(UT->getElementType(i)); - - return UnionValType(&ElTypes[0], ElTypes.size()); - } - - static unsigned hashTypeStructure(const UnionType *UT) { - return UT->getNumElements(); - } - - inline bool operator<(const UnionValType &UTV) const { - return (ElTypes < UTV.ElTypes); - } -}; - // FunctionValType - Define a class to hold the key that goes into the TypeMap // class FunctionValType { @@ -370,7 +344,7 @@ public: // We already have this type in the table. Get rid of the newly refined // type. TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get()); - Ty->unlockedRefineAbstractTypeTo(NewTy); + Ty->refineAbstractTypeTo(NewTy); return; } } else { @@ -385,31 +359,33 @@ public: if (I->second == Ty) { // Remember the position of the old type if we see it in our scan. Entry = I; + continue; + } + + if (!TypesEqual(Ty, I->second)) + continue; + + TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get()); + + // Remove the old entry form TypesByHash. If the hash values differ + // now, remove it from the old place. Otherwise, continue scanning + // withing this hashcode to reduce work. + if (NewTypeHash != OldTypeHash) { + RemoveFromTypesByHash(OldTypeHash, Ty); } else { - if (TypesEqual(Ty, I->second)) { - TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get()); - - // Remove the old entry form TypesByHash. If the hash values differ - // now, remove it from the old place. Otherwise, continue scanning - // withing this hashcode to reduce work. - if (NewTypeHash != OldTypeHash) { - RemoveFromTypesByHash(OldTypeHash, Ty); - } else { - if (Entry == E) { - // Find the location of Ty in the TypesByHash structure if we - // haven't seen it already. - while (I->second != Ty) { - ++I; - assert(I != E && "Structure doesn't contain type??"); - } - Entry = I; - } - TypesByHash.erase(Entry); + if (Entry == E) { + // Find the location of Ty in the TypesByHash structure if we + // haven't seen it already. + while (I->second != Ty) { + ++I; + assert(I != E && "Structure doesn't contain type??"); } - Ty->unlockedRefineAbstractTypeTo(NewTy); - return; + Entry = I; } + TypesByHash.erase(Entry); } + Ty->refineAbstractTypeTo(NewTy); + return; } // If there is no existing type of the same structure, we reinsert an diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp index b7fd92f9b0660..fec710b39459b 100644 --- a/lib/VMCore/Use.cpp +++ b/lib/VMCore/Use.cpp @@ -86,14 +86,27 @@ const Use *Use::getImpliedUser() const { //===----------------------------------------------------------------------===// Use *Use::initTags(Use * const Start, Use *Stop, ptrdiff_t Done) { + while (Done < 20) { + if (Start == Stop--) + return Start; + static const PrevPtrTag tags[20] = { fullStopTag, oneDigitTag, stopTag, + oneDigitTag, oneDigitTag, stopTag, + zeroDigitTag, oneDigitTag, oneDigitTag, + stopTag, zeroDigitTag, oneDigitTag, + zeroDigitTag, oneDigitTag, stopTag, + oneDigitTag, oneDigitTag, oneDigitTag, + oneDigitTag, stopTag + }; + Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(tags[Done++])); + Stop->Val = 0; + } + ptrdiff_t Count = Done; while (Start != Stop) { --Stop; Stop->Val = 0; if (!Count) { - Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(Done == 0 - ? fullStopTag - : stopTag)); + Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(stopTag)); ++Done; Count = Done; } else { diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 585edf09c9e5a..b8c677565467a 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -139,10 +139,6 @@ static bool getSymTab(Value *V, ValueSymbolTable *&ST) { } else if (Argument *A = dyn_cast<Argument>(V)) { if (Function *P = A->getParent()) ST = &P->getValueSymbolTable(); - } else if (NamedMDNode *N = dyn_cast<NamedMDNode>(V)) { - if (Module *P = N->getParent()) { - ST = &P->getValueSymbolTable(); - } } else if (isa<MDString>(V)) return true; else { @@ -492,10 +488,15 @@ void ValueHandleBase::ValueIsDeleted(Value *V) { ValueHandleBase *Entry = pImpl->ValueHandles[V]; assert(Entry && "Value bit set but no entries exist"); - // We use a local ValueHandleBase as an iterator so that - // ValueHandles can add and remove themselves from the list without - // breaking our iteration. This is not really an AssertingVH; we - // just have to give ValueHandleBase some kind. + // We use a local ValueHandleBase as an iterator so that ValueHandles can add + // and remove themselves from the list without breaking our iteration. This + // is not really an AssertingVH; we just have to give ValueHandleBase a kind. + // Note that we deliberately do not the support the case when dropping a value + // handle results in a new value handle being permanently added to the list + // (as might occur in theory for CallbackVH's): the new value handle will not + // be processed and the checking code will mete out righteous punishment if + // the handle is still present once we have finished processing all the other + // value handles (it is fine to momentarily add then remove a value handle). for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) { Iterator.RemoveFromUseList(); Iterator.AddToExistingUseListAfter(Entry); @@ -576,6 +577,24 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) { break; } } + +#ifndef NDEBUG + // If any new tracking or weak value handles were added while processing the + // list, then complain about it now. + if (Old->HasValueHandle) + for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next) + switch (Entry->getKind()) { + case Tracking: + case Weak: + dbgs() << "After RAUW from " << *Old->getType() << " %" + << Old->getNameStr() << " to " << *New->getType() << " %" + << New->getNameStr() << "\n"; + llvm_unreachable("A tracking or weak value handle still pointed to the" + " old value!\n"); + default: + break; + } +#endif } /// ~CallbackVH. Empty, but defined here to avoid emitting the vtable diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp index 449d61a2cbb18..254bf06439d9b 100644 --- a/lib/VMCore/ValueSymbolTable.cpp +++ b/lib/VMCore/ValueSymbolTable.cpp @@ -115,5 +115,3 @@ void ValueSymbolTable::dump() const { //DEBUG(dbgs() << "\n"); } } - -MDSymbolTable::~MDSymbolTable() { } diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index f97699dabd890..e3ecc979bf128 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -72,7 +72,7 @@ namespace { // Anonymous namespace for class struct PreVerifier : public FunctionPass { static char ID; // Pass ID, replacement for typeid - PreVerifier() : FunctionPass(&ID) { } + PreVerifier() : FunctionPass(ID) { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -102,9 +102,9 @@ namespace { // Anonymous namespace for class } char PreVerifier::ID = 0; -static RegisterPass<PreVerifier> -PreVer("preverify", "Preliminary module verification"); -static const PassInfo *const PreVerifyID = &PreVer; +INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification", + false, false); +char &PreVerifyID = PreVerifier::ID; namespace { class TypeSet : public AbstractTypeUser { @@ -182,23 +182,13 @@ namespace { SmallPtrSet<MDNode *, 32> MDNodes; Verifier() - : FunctionPass(&ID), + : FunctionPass(ID), Broken(false), RealPass(true), action(AbortProcessAction), Mod(0), Context(0), DT(0), MessagesStr(Messages) {} explicit Verifier(VerifierFailureAction ctn) - : FunctionPass(&ID), + : FunctionPass(ID), Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0), MessagesStr(Messages) {} - explicit Verifier(bool AB) - : FunctionPass(&ID), - Broken(false), RealPass(true), - action( AB ? AbortProcessAction : PrintMessageAction), Mod(0), - Context(0), DT(0), MessagesStr(Messages) {} - explicit Verifier(DominatorTree &dt) - : FunctionPass(&ID), - Broken(false), RealPass(false), action(PrintMessageAction), Mod(0), - Context(0), DT(&dt), MessagesStr(Messages) {} - bool doInitialization(Module &M) { Mod = &M; @@ -331,6 +321,7 @@ namespace { void visitBranchInst(BranchInst &BI); void visitReturnInst(ReturnInst &RI); void visitSwitchInst(SwitchInst &SI); + void visitIndirectBrInst(IndirectBrInst &BI); void visitSelectInst(SelectInst &SI); void visitUserOp1(Instruction &I); void visitUserOp2(Instruction &I) { visitUserOp1(I); } @@ -402,7 +393,7 @@ namespace { } // End anonymous namespace char Verifier::ID = 0; -static RegisterPass<Verifier> X("verify", "Module Verifier"); +INITIALIZE_PASS(Verifier, "verify", "Module Verifier", false, false); // Assert - We know that cond should be true, if not print an error message. #define Assert(C, M) \ @@ -445,6 +436,10 @@ void Verifier::visitGlobalValue(GlobalValue &GV) { Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(), "Only global arrays can have appending linkage!", GVar); } + + Assert1(!GV.hasLinkerPrivateWeakDefAutoLinkage() || GV.hasDefaultVisibility(), + "linker_private_weak_def_auto can only have default visibility!", + &GV); } void Verifier::visitGlobalVariable(GlobalVariable &GV) { @@ -504,8 +499,8 @@ void Verifier::visitNamedMDNode(NamedMDNode &NMD) { if (!MD) continue; - Assert2(!MD->isFunctionLocal(), - "Named metadata operand cannot be function local!", &NMD, MD); + Assert1(!MD->isFunctionLocal(), + "Named metadata operand cannot be function local!", MD); visitMDNode(*MD, 0); } } @@ -520,7 +515,7 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) { Value *Op = MD.getOperand(i); if (!Op) continue; - if (isa<Constant>(Op) || isa<MDString>(Op) || isa<NamedMDNode>(Op)) + if (isa<Constant>(Op) || isa<MDString>(Op)) continue; if (MDNode *N = dyn_cast<MDNode>(Op)) { Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(), @@ -864,6 +859,16 @@ void Verifier::visitSwitchInst(SwitchInst &SI) { visitTerminatorInst(SI); } +void Verifier::visitIndirectBrInst(IndirectBrInst &BI) { + Assert1(BI.getAddress()->getType()->isPointerTy(), + "Indirectbr operand must have pointer type!", &BI); + for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i) + Assert1(BI.getDestination(i)->getType()->isLabelTy(), + "Indirectbr destinations must all have pointer type!", &BI); + + visitTerminatorInst(BI); +} + void Verifier::visitSelectInst(SelectInst &SI) { Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1), SI.getOperand(2)), @@ -1202,6 +1207,7 @@ void Verifier::visitCallInst(CallInst &CI) { void Verifier::visitInvokeInst(InvokeInst &II) { VerifyCallSite(&II); + visitTerminatorInst(II); } /// visitBinaryOperator - Check that both arguments to the binary operator are @@ -1266,28 +1272,37 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { visitInstruction(B); } -void Verifier::visitICmpInst(ICmpInst& IC) { +void Verifier::visitICmpInst(ICmpInst &IC) { // Check that the operands are the same type - const Type* Op0Ty = IC.getOperand(0)->getType(); - const Type* Op1Ty = IC.getOperand(1)->getType(); + const Type *Op0Ty = IC.getOperand(0)->getType(); + const Type *Op1Ty = IC.getOperand(1)->getType(); Assert1(Op0Ty == Op1Ty, "Both operands to ICmp instruction are not of the same type!", &IC); // Check that the operands are the right type Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(), "Invalid operand types for ICmp instruction", &IC); + // Check that the predicate is valid. + Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE && + IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE, + "Invalid predicate in ICmp instruction!", &IC); visitInstruction(IC); } -void Verifier::visitFCmpInst(FCmpInst& FC) { +void Verifier::visitFCmpInst(FCmpInst &FC) { // Check that the operands are the same type - const Type* Op0Ty = FC.getOperand(0)->getType(); - const Type* Op1Ty = FC.getOperand(1)->getType(); + const Type *Op0Ty = FC.getOperand(0)->getType(); + const Type *Op1Ty = FC.getOperand(1)->getType(); Assert1(Op0Ty == Op1Ty, "Both operands to FCmp instruction are not of the same type!", &FC); // Check that the operands are the right type Assert1(Op0Ty->isFPOrFPVectorTy(), "Invalid operand types for FCmp instruction", &FC); + // Check that the predicate is valid. + Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE && + FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE, + "Invalid predicate in FCmp instruction!", &FC); + visitInstruction(FC); } @@ -1310,27 +1325,6 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) { Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1), SV.getOperand(2)), "Invalid shufflevector operands!", &SV); - - const VectorType *VTy = dyn_cast<VectorType>(SV.getOperand(0)->getType()); - Assert1(VTy, "Operands are not a vector type", &SV); - - // Check to see if Mask is valid. - if (const ConstantVector *MV = dyn_cast<ConstantVector>(SV.getOperand(2))) { - for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) { - if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) { - Assert1(!CI->uge(VTy->getNumElements()*2), - "Invalid shufflevector shuffle mask!", &SV); - } else { - Assert1(isa<UndefValue>(MV->getOperand(i)), - "Invalid shufflevector shuffle mask!", &SV); - } - } - } else { - Assert1(isa<UndefValue>(SV.getOperand(2)) || - isa<ConstantAggregateZero>(SV.getOperand(2)), - "Invalid shufflevector shuffle mask!", &SV); - } - visitInstruction(SV); } @@ -1408,10 +1402,6 @@ void Verifier::visitInstruction(Instruction &I) { "Only PHI nodes may reference their own value!", &I); } - // Verify that if this is a terminator that it is at the end of the block. - if (isa<TerminatorInst>(I)) - Assert1(BB->getTerminator() == &I, "Terminator not at end of block!", &I); - // Check that void typed values don't have names Assert1(!I.getType()->isVoidTy() || !I.hasName(), "Instruction has a name, but provides a void value!", &I); @@ -1570,7 +1560,8 @@ void Verifier::VerifyType(const Type *Ty) { "Function type with invalid parameter type", ElTy, FTy); VerifyType(ElTy); } - } break; + break; + } case Type::StructTyID: { const StructType *STy = cast<StructType>(Ty); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { @@ -1579,34 +1570,29 @@ void Verifier::VerifyType(const Type *Ty) { "Structure type with invalid element type", ElTy, STy); VerifyType(ElTy); } - } break; - case Type::UnionTyID: { - const UnionType *UTy = cast<UnionType>(Ty); - for (unsigned i = 0, e = UTy->getNumElements(); i != e; ++i) { - const Type *ElTy = UTy->getElementType(i); - Assert2(UnionType::isValidElementType(ElTy), - "Union type with invalid element type", ElTy, UTy); - VerifyType(ElTy); - } - } break; + break; + } case Type::ArrayTyID: { const ArrayType *ATy = cast<ArrayType>(Ty); Assert1(ArrayType::isValidElementType(ATy->getElementType()), "Array type with invalid element type", ATy); VerifyType(ATy->getElementType()); - } break; + break; + } case Type::PointerTyID: { const PointerType *PTy = cast<PointerType>(Ty); Assert1(PointerType::isValidElementType(PTy->getElementType()), "Pointer type with invalid element type", PTy); VerifyType(PTy->getElementType()); - } break; + break; + } case Type::VectorTyID: { const VectorType *VTy = cast<VectorType>(Ty); Assert1(VectorType::isValidElementType(VTy->getElementType()), "Vector type with invalid element type", VTy); VerifyType(VTy->getElementType()); - } break; + break; + } default: break; } @@ -1832,8 +1818,13 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, // and iPTR. In the verifier, we can not distinguish which case we have so // allow either case to be legal. if (const PointerType* PTyp = dyn_cast<PointerType>(Ty)) { - Suffix += ".p" + utostr(PTyp->getAddressSpace()) + - EVT::getEVT(PTyp->getElementType()).getEVTString(); + EVT PointeeVT = EVT::getEVT(PTyp->getElementType(), true); + if (PointeeVT == MVT::Other) { + CheckFailed("Intrinsic has pointer to complex type."); + return false; + } + Suffix += ".p" + utostr(PTyp->getAddressSpace()) + + PointeeVT.getEVTString(); } else { CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a " "pointer and a pointer is required.", F); |