diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-08-07 23:01:33 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-08-07 23:01:33 +0000 |
commit | ee8648bdac07986a0f1ec897b02ec82a2f144d46 (patch) | |
tree | 52d1861acda1205241ee35a94aa63129c604d469 /lib | |
parent | 1a82d4c088707c791c792f6822f611b47a12bdfe (diff) |
Diffstat (limited to 'lib')
412 files changed, 16888 insertions, 6713 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index ad0727a0e0e5..44d137dffd22 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -71,11 +71,6 @@ void AliasAnalysis::deleteValue(Value *V) { AA->deleteValue(V); } -void AliasAnalysis::copyValue(Value *From, Value *To) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - AA->copyValue(From, To); -} - void AliasAnalysis::addEscapingUse(Use &U) { assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); AA->addEscapingUse(U); diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp index 1ef49fc02fef..e5107b3bc827 100644 --- a/lib/Analysis/AliasDebugger.cpp +++ b/lib/Analysis/AliasDebugger.cpp @@ -124,10 +124,6 @@ namespace { assert(Vals.find(V) != Vals.end() && "Never seen value in AA before"); AliasAnalysis::deleteValue(V); } - void copyValue(Value *From, Value *To) override { - Vals.insert(To); - AliasAnalysis::copyValue(From, To); - } }; } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index bf8cda1ffaec..54d0f4304e1f 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -544,9 +544,6 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { // the tracker already knows about a value, it will ignore the request. // void AliasSetTracker::copyValue(Value *From, Value *To) { - // Notify the alias analysis implementation that this value is copied. - AA.copyValue(From, To); - // First, look up the PointerRec for this pointer. PointerMapType::iterator I = PointerMap.find_as(From); if (I == PointerMap.end()) diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 8e812252fdfe..68f766edb301 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -685,6 +685,9 @@ BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { if (CS.onlyReadsMemory()) Min = OnlyReadsMemory; + if (CS.onlyAccessesArgMemory()) + Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees); + // The AliasAnalysis base class has some smarts, lets use them. return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); } @@ -710,6 +713,9 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) { if (F->onlyReadsMemory()) Min = OnlyReadsMemory; + if (F->onlyAccessesArgMemory()) + Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees); + const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); if (isMemsetPattern16(F, TLI)) diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 2f4c6a92f9af..02a5aef03223 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1234,6 +1234,8 @@ bool llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::floor: case Intrinsic::ceil: case Intrinsic::sqrt: + case Intrinsic::sin: + case Intrinsic::cos: case Intrinsic::pow: case Intrinsic::powi: case Intrinsic::bswap: @@ -1450,6 +1452,10 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, return ConstantFoldFP(floor, V, Ty); case Intrinsic::ceil: return ConstantFoldFP(ceil, V, Ty); + case Intrinsic::sin: + return ConstantFoldFP(sin, V, Ty); + case Intrinsic::cos: + return ConstantFoldFP(cos, V, Ty); } if (!TLI) diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index f1ddde252924..18d45dd6a396 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -42,94 +42,111 @@ STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); namespace { - /// FunctionRecord - One instance of this structure is stored for every - /// function in the program. Later, the entries for these functions are - /// removed if the function is found to call an external function (in which - /// case we know nothing about it. - struct FunctionRecord { - /// GlobalInfo - Maintain mod/ref info for all of the globals without - /// addresses taken that are read or written (transitively) by this - /// function. - std::map<const GlobalValue*, unsigned> GlobalInfo; - - /// MayReadAnyGlobal - May read global variables, but it is not known which. - bool MayReadAnyGlobal; - - unsigned getInfoForGlobal(const GlobalValue *GV) const { - unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; - std::map<const GlobalValue*, unsigned>::const_iterator I = +/// FunctionRecord - One instance of this structure is stored for every +/// function in the program. Later, the entries for these functions are +/// removed if the function is found to call an external function (in which +/// case we know nothing about it. +struct FunctionRecord { + /// GlobalInfo - Maintain mod/ref info for all of the globals without + /// addresses taken that are read or written (transitively) by this + /// function. + std::map<const GlobalValue *, unsigned> GlobalInfo; + + /// MayReadAnyGlobal - May read global variables, but it is not known which. + bool MayReadAnyGlobal; + + unsigned getInfoForGlobal(const GlobalValue *GV) const { + unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; + std::map<const GlobalValue *, unsigned>::const_iterator I = GlobalInfo.find(GV); - if (I != GlobalInfo.end()) - Effect |= I->second; - return Effect; - } + if (I != GlobalInfo.end()) + Effect |= I->second; + return Effect; + } - /// FunctionEffect - Capture whether or not this function reads or writes to - /// ANY memory. If not, we can do a lot of aggressive analysis on it. - unsigned FunctionEffect; + /// FunctionEffect - Capture whether or not this function reads or writes to + /// ANY memory. If not, we can do a lot of aggressive analysis on it. + unsigned FunctionEffect; - FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {} - }; + FunctionRecord() : MayReadAnyGlobal(false), FunctionEffect(0) {} +}; - /// GlobalsModRef - The actual analysis pass. - class GlobalsModRef : public ModulePass, public AliasAnalysis { - /// NonAddressTakenGlobals - The globals that do not have their addresses - /// taken. - std::set<const GlobalValue*> NonAddressTakenGlobals; +/// GlobalsModRef - The actual analysis pass. +class GlobalsModRef : public ModulePass, public AliasAnalysis { + /// NonAddressTakenGlobals - The globals that do not have their addresses + /// taken. + std::set<const GlobalValue *> NonAddressTakenGlobals; - /// IndirectGlobals - The memory pointed to by this global is known to be - /// 'owned' by the global. - std::set<const GlobalValue*> IndirectGlobals; + /// IndirectGlobals - The memory pointed to by this global is known to be + /// 'owned' by the global. + std::set<const GlobalValue *> IndirectGlobals; - /// AllocsForIndirectGlobals - If an instruction allocates memory for an - /// indirect global, this map indicates which one. - std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals; + /// AllocsForIndirectGlobals - If an instruction allocates memory for an + /// indirect global, this map indicates which one. + std::map<const Value *, const GlobalValue *> AllocsForIndirectGlobals; - /// FunctionInfo - For each function, keep track of what globals are - /// modified or read. - std::map<const Function*, FunctionRecord> FunctionInfo; + /// FunctionInfo - For each function, keep track of what globals are + /// modified or read. + std::map<const Function *, FunctionRecord> FunctionInfo; - public: - static char ID; - GlobalsModRef() : ModulePass(ID) { - initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); - } +public: + static char ID; + GlobalsModRef() : ModulePass(ID) { + initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); + } - bool runOnModule(Module &M) override { - InitializeAliasAnalysis(this, &M.getDataLayout()); + bool runOnModule(Module &M) override { + InitializeAliasAnalysis(this, &M.getDataLayout()); - // Find non-addr taken globals. - AnalyzeGlobals(M); + // Find non-addr taken globals. + AnalyzeGlobals(M); - // Propagate on CG. - AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M); - return false; - } + // Propagate on CG. + AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M); + return false; + } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AliasAnalysis::getAnalysisUsage(AU); - AU.addRequired<CallGraphWrapperPass>(); - AU.setPreservesAll(); // Does not transform code - } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AliasAnalysis::getAnalysisUsage(AU); + AU.addRequired<CallGraphWrapperPass>(); + AU.setPreservesAll(); // Does not transform code + } + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) override; + ModRefResult getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) override; + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) override { + return AliasAnalysis::getModRefInfo(CS1, CS2); + } - //------------------------------------------------ - // Implement the AliasAnalysis API - // - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - return AliasAnalysis::getModRefInfo(CS1, CS2); + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(const Function *F) override { + ModRefBehavior Min = UnknownModRefBehavior; + + if (FunctionRecord *FR = getFunctionInfo(F)) { + if (FR->FunctionEffect == 0) + Min = DoesNotAccessMemory; + else if ((FR->FunctionEffect & Mod) == 0) + Min = OnlyReadsMemory; } - /// getModRefBehavior - Return the behavior of the specified function if - /// called from the specified call site. The call site may be null in which - /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(const Function *F) override { - ModRefBehavior Min = UnknownModRefBehavior; + return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); + } + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { + ModRefBehavior Min = UnknownModRefBehavior; + if (const Function *F = CS.getCalledFunction()) if (FunctionRecord *FR = getFunctionInfo(F)) { if (FR->FunctionEffect == 0) Min = DoesNotAccessMemory; @@ -137,68 +154,50 @@ namespace { Min = OnlyReadsMemory; } - return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); - } - - /// getModRefBehavior - Return the behavior of the specified function if - /// called from the specified call site. The call site may be null in which - /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { - ModRefBehavior Min = UnknownModRefBehavior; - - if (const Function* F = CS.getCalledFunction()) - if (FunctionRecord *FR = getFunctionInfo(F)) { - if (FR->FunctionEffect == 0) - Min = DoesNotAccessMemory; - else if ((FR->FunctionEffect & Mod) == 0) - Min = OnlyReadsMemory; - } + return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); + } - return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); - } + void deleteValue(Value *V) override; + void addEscapingUse(Use &U) override; + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + void *getAdjustedAnalysisPointer(AnalysisID PI) override { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis *)this; + return this; + } - void deleteValue(Value *V) override; - void copyValue(Value *From, Value *To) override; - void addEscapingUse(Use &U) override; - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(AnalysisID PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - private: - /// getFunctionInfo - Return the function info for the function, or null if - /// we don't have anything useful to say about it. - FunctionRecord *getFunctionInfo(const Function *F) { - std::map<const Function*, FunctionRecord>::iterator I = +private: + /// getFunctionInfo - Return the function info for the function, or null if + /// we don't have anything useful to say about it. + FunctionRecord *getFunctionInfo(const Function *F) { + std::map<const Function *, FunctionRecord>::iterator I = FunctionInfo.find(F); - if (I != FunctionInfo.end()) - return &I->second; - return nullptr; - } + if (I != FunctionInfo.end()) + return &I->second; + return nullptr; + } - void AnalyzeGlobals(Module &M); - void AnalyzeCallGraph(CallGraph &CG, Module &M); - bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers, - std::vector<Function*> &Writers, - GlobalValue *OkayStoreDest = nullptr); - bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); - }; + void AnalyzeGlobals(Module &M); + void AnalyzeCallGraph(CallGraph &CG, Module &M); + bool AnalyzeUsesOfPointer(Value *V, std::vector<Function *> &Readers, + std::vector<Function *> &Writers, + GlobalValue *OkayStoreDest = nullptr); + bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); +}; } char GlobalsModRef::ID = 0; -INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, - "globalsmodref-aa", "Simple mod/ref analysis for globals", - false, true, false) +INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", + "Simple mod/ref analysis for globals", false, true, + false) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, - "globalsmodref-aa", "Simple mod/ref analysis for globals", - false, true, false) +INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", + "Simple mod/ref analysis for globals", false, true, + false) Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } @@ -207,7 +206,7 @@ Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } /// (really, their address passed to something nontrivial), record this fact, /// and record the functions that they are used directly in. void GlobalsModRef::AnalyzeGlobals(Module &M) { - std::vector<Function*> Readers, Writers; + std::vector<Function *> Readers, Writers; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->hasLocalLinkage()) { if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { @@ -215,11 +214,12 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) { NonAddressTakenGlobals.insert(I); ++NumNonAddrTakenFunctions; } - Readers.clear(); Writers.clear(); + Readers.clear(); + Writers.clear(); } - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; + ++I) if (I->hasLocalLinkage()) { if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { // Remember that we are tracking this global, and the mod/ref fns @@ -228,7 +228,7 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) { for (unsigned i = 0, e = Readers.size(); i != e; ++i) FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref; - if (!I->isConstant()) // No need to keep track of writers to constants + if (!I->isConstant()) // No need to keep track of writers to constants for (unsigned i = 0, e = Writers.size(); i != e; ++i) FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod; ++NumNonAddrTakenGlobalVars; @@ -238,7 +238,8 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) { AnalyzeIndirectGlobalMemory(I)) ++NumIndirectGlobalVars; } - Readers.clear(); Writers.clear(); + Readers.clear(); + Writers.clear(); } } @@ -249,10 +250,11 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) { /// /// If OkayStoreDest is non-null, stores into this global are allowed. bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, - std::vector<Function*> &Readers, - std::vector<Function*> &Writers, + std::vector<Function *> &Readers, + std::vector<Function *> &Writers, GlobalValue *OkayStoreDest) { - if (!V->getType()->isPointerTy()) return true; + if (!V->getType()->isPointerTy()) + return true; for (Use &U : V->uses()) { User *I = U.getUser(); @@ -262,7 +264,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, if (V == SI->getOperand(1)) { Writers.push_back(SI->getParent()->getParent()); } else if (SI->getOperand(1) != OkayStoreDest) { - return true; // Storing the pointer + return true; // Storing the pointer } } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) { if (AnalyzeUsesOfPointer(I, Readers, Writers)) @@ -282,7 +284,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) - return true; // Allow comparison against null. + return true; // Allow comparison against null. } else { return true; } @@ -301,7 +303,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Keep track of values related to the allocation of the memory, f.e. the // value produced by the malloc call and any casts. - std::vector<Value*> AllocRelatedValues; + std::vector<Value *> AllocRelatedValues; // Walk the user list of the global. If we find anything other than a direct // load or store, bail out. @@ -310,13 +312,14 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // The pointer loaded from the global can only be used in simple ways: // we allow addressing of it and loading storing to it. We do *not* allow // storing the loaded pointer somewhere else or passing to a function. - std::vector<Function*> ReadersWriters; + std::vector<Function *> ReadersWriters; if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters)) - return false; // Loaded pointer escapes. + return false; // Loaded pointer escapes. // TODO: Could try some IP mod/ref of the loaded pointer. } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { // Storing the global itself. - if (SI->getOperand(0) == GV) return false; + if (SI->getOperand(0) == GV) + return false; // If storing the null pointer, ignore it. if (isa<ConstantPointerNull>(SI->getOperand(0))) @@ -327,13 +330,13 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { GV->getParent()->getDataLayout()); if (!isAllocLikeFn(Ptr, TLI)) - return false; // Too hard to analyze. + return false; // Too hard to analyze. // Analyze all uses of the allocation. If any of them are used in a // non-simple way (e.g. stored to another global) bail out. - std::vector<Function*> ReadersWriters; + std::vector<Function *> ReadersWriters; if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV)) - return false; // Loaded pointer escapes. + return false; // Loaded pointer escapes. // Remember that this allocation is related to the indirect global. AllocRelatedValues.push_back(Ptr); @@ -360,7 +363,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { // We do a bottom-up SCC traversal of the call graph. In other words, we // visit all callees before callers (leaf-first). - for (scc_iterator<CallGraph*> I = scc_begin(&CG); !I.isAtEnd(); ++I) { + for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { const std::vector<CallGraphNode *> &SCC = *I; assert(!SCC.empty() && "SCC with no functions?"); @@ -437,9 +440,10 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { } // Scan the function bodies for explicit loads or stores. - for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i) + for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef; + ++i) for (inst_iterator II = inst_begin(SCC[i]->getFunction()), - E = inst_end(SCC[i]->getFunction()); + E = inst_end(SCC[i]->getFunction()); II != E && FunctionEffect != ModRef; ++II) if (LoadInst *LI = dyn_cast<LoadInst>(&*II)) { FunctionEffect |= Ref; @@ -474,8 +478,6 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { } } - - /// alias - If one of the pointers is to a global that we are tracking, and the /// other is some random pointer, we know there cannot be an alias, because the /// address of the global isn't taken. @@ -492,8 +494,10 @@ AliasResult GlobalsModRef::alias(const MemoryLocation &LocA, if (GV1 || GV2) { // If the global's address is taken, pretend we don't know it's a pointer to // the global. - if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = nullptr; - if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = nullptr; + if (GV1 && !NonAddressTakenGlobals.count(GV1)) + GV1 = nullptr; + if (GV2 && !NonAddressTakenGlobals.count(GV2)) + GV2 = nullptr; // If the two pointers are derived from two different non-addr-taken // globals, or if one is and the other isn't, we know these can't alias. @@ -554,7 +558,6 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc)); } - //===----------------------------------------------------------------------===// // Methods to update the analysis as a result of the client transformation. // @@ -565,9 +568,10 @@ void GlobalsModRef::deleteValue(Value *V) { // any AllocRelatedValues for it. if (IndirectGlobals.erase(GV)) { // Remove any entries in AllocsForIndirectGlobals for this global. - for (std::map<const Value*, const GlobalValue*>::iterator - I = AllocsForIndirectGlobals.begin(), - E = AllocsForIndirectGlobals.end(); I != E; ) { + for (std::map<const Value *, const GlobalValue *>::iterator + I = AllocsForIndirectGlobals.begin(), + E = AllocsForIndirectGlobals.end(); + I != E;) { if (I->second == GV) { AllocsForIndirectGlobals.erase(I++); } else { @@ -585,16 +589,12 @@ void GlobalsModRef::deleteValue(Value *V) { AliasAnalysis::deleteValue(V); } -void GlobalsModRef::copyValue(Value *From, Value *To) { - AliasAnalysis::copyValue(From, To); -} - void GlobalsModRef::addEscapingUse(Use &U) { // For the purposes of this analysis, it is conservatively correct to treat // a newly escaping value equivalently to a deleted one. We could perhaps // be more precise by processing the new use and attempting to update our // saved analysis results to accommodate it. deleteValue(U); - + AliasAnalysis::addEscapingUse(U); } diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 349b9cac2c2d..c0d2e375cb04 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -783,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { case Intrinsic::memmove: // SROA can usually chew through these intrinsics, but they aren't free. return false; - case Intrinsic::frameescape: + case Intrinsic::localescape: HasFrameEscape = true; return false; } @@ -1424,11 +1424,11 @@ bool InlineCostAnalysis::isInlineViable(Function &F) { cast<CallInst>(CS.getInstruction())->canReturnTwice()) return false; - // Disallow inlining functions that call @llvm.frameescape. Doing this + // Disallow inlining functions that call @llvm.localescape. Doing this // correctly would require major changes to the inliner. if (CS.getCalledFunction() && CS.getCalledFunction()->getIntrinsicID() == - llvm::Intrinsic::frameescape) + llvm::Intrinsic::localescape) return false; } } diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index b88b2496b875..926787d3be91 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -12,8 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/IVUsers.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" @@ -34,6 +36,7 @@ using namespace llvm; char IVUsers::ID = 0; INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", "Induction Variable Users", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) @@ -137,6 +140,11 @@ bool IVUsers::AddUsersImpl(Instruction *I, if (Width > 64 || !DL.isLegalInteger(Width)) return false; + // Don't attempt to promote ephemeral values to indvars. They will be removed + // later anyway. + if (EphValues.count(I)) + return false; + // Get the symbolic expression for this instruction. const SCEV *ISE = SE->getSCEV(I); @@ -244,6 +252,7 @@ IVUsers::IVUsers() } void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<ScalarEvolution>(); @@ -253,10 +262,16 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { L = l; + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache( + *L->getHeader()->getParent()); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &getAnalysis<ScalarEvolution>(); + // Collect ephemeral values so that AddUsersIfInteresting skips them. + EphValues.clear(); + CodeMetrics::collectEphemeralValues(L, AC, EphValues); + // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 12e406bb1a2d..fa42b48b6cdb 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -3046,7 +3047,8 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + FastMathFlags FMF, const Query &Q, + unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); @@ -3065,6 +3067,14 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Pred == FCmpInst::FCMP_TRUE) return ConstantInt::get(GetCompareTy(LHS), 1); + // UNO/ORD predicates can be trivially folded if NaNs are ignored. + if (FMF.noNaNs()) { + if (Pred == FCmpInst::FCMP_UNO) + return ConstantInt::get(GetCompareTy(LHS), 0); + if (Pred == FCmpInst::FCMP_ORD) + return ConstantInt::get(GetCompareTy(LHS), 1); + } + // fcmp pred x, undef and fcmp pred undef, x // fold to true if unordered, false if ordered if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) { @@ -3151,12 +3161,12 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout &DL, + FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, + Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is @@ -3511,6 +3521,82 @@ Value *llvm::SimplifyInsertValueInst( RecursionLimit); } +/// SimplifyExtractValueInst - Given operands for an ExtractValueInst, see if we +/// can fold the result. If not, this returns null. +static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, + const Query &, unsigned) { + if (auto *CAgg = dyn_cast<Constant>(Agg)) + return ConstantFoldExtractValueInstruction(CAgg, Idxs); + + // extractvalue x, (insertvalue y, elt, n), n -> elt + unsigned NumIdxs = Idxs.size(); + for (auto *IVI = dyn_cast<InsertValueInst>(Agg); IVI != nullptr; + IVI = dyn_cast<InsertValueInst>(IVI->getAggregateOperand())) { + ArrayRef<unsigned> InsertValueIdxs = IVI->getIndices(); + unsigned NumInsertValueIdxs = InsertValueIdxs.size(); + unsigned NumCommonIdxs = std::min(NumInsertValueIdxs, NumIdxs); + if (InsertValueIdxs.slice(0, NumCommonIdxs) == + Idxs.slice(0, NumCommonIdxs)) { + if (NumIdxs == NumInsertValueIdxs) + return IVI->getInsertedValueOperand(); + break; + } + } + + return nullptr; +} + +Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, + const DataLayout &DL, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, + AssumptionCache *AC, + const Instruction *CxtI) { + return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI), + RecursionLimit); +} + +/// SimplifyExtractElementInst - Given operands for an ExtractElementInst, see if we +/// can fold the result. If not, this returns null. +static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &, + unsigned) { + if (auto *CVec = dyn_cast<Constant>(Vec)) { + if (auto *CIdx = dyn_cast<Constant>(Idx)) + return ConstantFoldExtractElementInstruction(CVec, CIdx); + + // The index is not relevant if our vector is a splat. + if (auto *Splat = CVec->getSplatValue()) + return Splat; + + if (isa<UndefValue>(Vec)) + return UndefValue::get(Vec->getType()->getVectorElementType()); + } + + // If extracting a specified index from the vector, see if we can recursively + // find a previously computed scalar that was inserted into the vector. + if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) { + unsigned IndexVal = IdxC->getZExtValue(); + unsigned VectorWidth = Vec->getType()->getVectorNumElements(); + + // If this is extracting an invalid index, turn this into undef, to avoid + // crashing the code below. + if (IndexVal >= VectorWidth) + return UndefValue::get(Vec->getType()->getVectorElementType()); + + if (Value *Elt = findScalarElement(Vec, IndexVal)) + return Elt; + } + + return nullptr; +} + +Value *llvm::SimplifyExtractElementInst( + Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { + return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI), + RecursionLimit); +} + /// SimplifyPHINode - See if we can fold the given phi. If not, returns null. static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // If all of the PHI's incoming values are the same then replace the PHI node @@ -3670,7 +3756,7 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); - return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse); + return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse); } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -3900,9 +3986,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::FCmp: - Result = - SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0), - I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), @@ -3920,6 +4006,18 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, IV->getIndices(), DL, TLI, DT, AC, I); break; } + case Instruction::ExtractValue: { + auto *EVI = cast<ExtractValueInst>(I); + Result = SimplifyExtractValueInst(EVI->getAggregateOperand(), + EVI->getIndices(), DL, TLI, DT, AC, I); + break; + } + case Instruction::ExtractElement: { + auto *EEI = cast<ExtractElementInst>(I); + Result = SimplifyExtractElementInst( + EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I); + break; + } case Instruction::PHI: Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I)); break; diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index b11cd7e84a6d..becbae4c5b50 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -48,6 +48,13 @@ static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold( cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8)); unsigned VectorizerParams::RuntimeMemoryCheckThreshold; +/// \brief The maximum iterations used to merge memory checks +static cl::opt<unsigned> MemoryCheckMergeThreshold( + "memory-check-merge-threshold", cl::Hidden, + cl::desc("Maximum number of comparisons done when trying to merge " + "runtime memory checks. (default = 100)"), + cl::init(100)); + /// Maximum SIMD width. const unsigned VectorizerParams::MaxVectorWidth = 64; @@ -112,35 +119,182 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, return SE->getSCEV(Ptr); } -void LoopAccessInfo::RuntimePointerCheck::insert( - ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, - unsigned ASId, const ValueToValueMap &Strides) { +void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, + unsigned DepSetId, unsigned ASId, + const ValueToValueMap &Strides) { // Get the stride replaced scev. const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); assert(AR && "Invalid addrec expression"); const SCEV *Ex = SE->getBackedgeTakenCount(Lp); const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); - Pointers.push_back(Ptr); - Starts.push_back(AR->getStart()); - Ends.push_back(ScEnd); - IsWritePtr.push_back(WritePtr); - DependencySetId.push_back(DepSetId); - AliasSetId.push_back(ASId); + Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId, + Sc); +} + +bool RuntimePointerChecking::needsChecking( + const CheckingPtrGroup &M, const CheckingPtrGroup &N, + const SmallVectorImpl<int> *PtrPartition) const { + for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) + for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) + if (needsChecking(M.Members[I], N.Members[J], PtrPartition)) + return true; + return false; +} + +/// Compare \p I and \p J and return the minimum. +/// Return nullptr in case we couldn't find an answer. +static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J, + ScalarEvolution *SE) { + const SCEV *Diff = SE->getMinusSCEV(J, I); + const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff); + + if (!C) + return nullptr; + if (C->getValue()->isNegative()) + return J; + return I; +} + +bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) { + const SCEV *Start = RtCheck.Pointers[Index].Start; + const SCEV *End = RtCheck.Pointers[Index].End; + + // Compare the starts and ends with the known minimum and maximum + // of this set. We need to know how we compare against the min/max + // of the set in order to be able to emit memchecks. + const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE); + if (!Min0) + return false; + + const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE); + if (!Min1) + return false; + + // Update the low bound expression if we've found a new min value. + if (Min0 == Start) + Low = Start; + + // Update the high bound expression if we've found a new max value. + if (Min1 != End) + High = End; + + Members.push_back(Index); + return true; } -bool LoopAccessInfo::RuntimePointerCheck::needsChecking( +void RuntimePointerChecking::groupChecks( + MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { + // We build the groups from dependency candidates equivalence classes + // because: + // - We know that pointers in the same equivalence class share + // the same underlying object and therefore there is a chance + // that we can compare pointers + // - We wouldn't be able to merge two pointers for which we need + // to emit a memcheck. The classes in DepCands are already + // conveniently built such that no two pointers in the same + // class need checking against each other. + + // We use the following (greedy) algorithm to construct the groups + // For every pointer in the equivalence class: + // For each existing group: + // - if the difference between this pointer and the min/max bounds + // of the group is a constant, then make the pointer part of the + // group and update the min/max bounds of that group as required. + + CheckingGroups.clear(); + + // If we don't have the dependency partitions, construct a new + // checking pointer group for each pointer. + if (!UseDependencies) { + for (unsigned I = 0; I < Pointers.size(); ++I) + CheckingGroups.push_back(CheckingPtrGroup(I, *this)); + return; + } + + unsigned TotalComparisons = 0; + + DenseMap<Value *, unsigned> PositionMap; + for (unsigned Index = 0; Index < Pointers.size(); ++Index) + PositionMap[Pointers[Index].PointerValue] = Index; + + // We need to keep track of what pointers we've already seen so we + // don't process them twice. + SmallSet<unsigned, 2> Seen; + + // Go through all equivalence classes, get the the "pointer check groups" + // and add them to the overall solution. We use the order in which accesses + // appear in 'Pointers' to enforce determinism. + for (unsigned I = 0; I < Pointers.size(); ++I) { + // We've seen this pointer before, and therefore already processed + // its equivalence class. + if (Seen.count(I)) + continue; + + MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue, + Pointers[I].IsWritePtr); + + SmallVector<CheckingPtrGroup, 2> Groups; + auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access)); + + // Because DepCands is constructed by visiting accesses in the order in + // which they appear in alias sets (which is deterministic) and the + // iteration order within an equivalence class member is only dependent on + // the order in which unions and insertions are performed on the + // equivalence class, the iteration order is deterministic. + for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); + MI != ME; ++MI) { + unsigned Pointer = PositionMap[MI->getPointer()]; + bool Merged = false; + // Mark this pointer as seen. + Seen.insert(Pointer); + + // Go through all the existing sets and see if we can find one + // which can include this pointer. + for (CheckingPtrGroup &Group : Groups) { + // Don't perform more than a certain amount of comparisons. + // This should limit the cost of grouping the pointers to something + // reasonable. If we do end up hitting this threshold, the algorithm + // will create separate groups for all remaining pointers. + if (TotalComparisons > MemoryCheckMergeThreshold) + break; + + TotalComparisons++; + + if (Group.addPointer(Pointer)) { + Merged = true; + break; + } + } + + if (!Merged) + // We couldn't add this pointer to any existing set or the threshold + // for the number of comparisons has been reached. Create a new group + // to hold the current pointer. + Groups.push_back(CheckingPtrGroup(Pointer, *this)); + } + + // We've computed the grouped checks for this partition. + // Save the results and continue with the next one. + std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups)); + } +} + +bool RuntimePointerChecking::needsChecking( unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const { + const PointerInfo &PointerI = Pointers[I]; + const PointerInfo &PointerJ = Pointers[J]; + // No need to check if two readonly pointers intersect. - if (!IsWritePtr[I] && !IsWritePtr[J]) + if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr) return false; // Only need to check pointers between two different dependency sets. - if (DependencySetId[I] == DependencySetId[J]) + if (PointerI.DependencySetId == PointerJ.DependencySetId) return false; // Only need to check pointers in the same alias set. - if (AliasSetId[I] != AliasSetId[J]) + if (PointerI.AliasSetId != PointerJ.AliasSetId) return false; // If PtrPartition is set omit checks between pointers of the same partition. @@ -153,45 +307,75 @@ bool LoopAccessInfo::RuntimePointerCheck::needsChecking( return true; } -void LoopAccessInfo::RuntimePointerCheck::print( +void RuntimePointerChecking::print( raw_ostream &OS, unsigned Depth, const SmallVectorImpl<int> *PtrPartition) const { - unsigned NumPointers = Pointers.size(); - if (NumPointers == 0) - return; OS.indent(Depth) << "Run-time memory checks:\n"; + unsigned N = 0; - for (unsigned I = 0; I < NumPointers; ++I) - for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J, PtrPartition)) { - OS.indent(Depth) << N++ << ":\n"; - OS.indent(Depth + 2) << *Pointers[I]; - if (PtrPartition) - OS << " (Partition: " << (*PtrPartition)[I] << ")"; - OS << "\n"; - OS.indent(Depth + 2) << *Pointers[J]; - if (PtrPartition) - OS << " (Partition: " << (*PtrPartition)[J] << ")"; - OS << "\n"; + for (unsigned I = 0; I < CheckingGroups.size(); ++I) + for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) + if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) { + OS.indent(Depth) << "Check " << N++ << ":\n"; + OS.indent(Depth + 2) << "Comparing group " << I << ":\n"; + + for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) { + OS.indent(Depth + 2) + << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n"; + if (PtrPartition) + OS << " (Partition: " + << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")" + << "\n"; + } + + OS.indent(Depth + 2) << "Against group " << J << ":\n"; + + for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) { + OS.indent(Depth + 2) + << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n"; + if (PtrPartition) + OS << " (Partition: " + << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")" + << "\n"; + } } + + OS.indent(Depth) << "Grouped accesses:\n"; + for (unsigned I = 0; I < CheckingGroups.size(); ++I) { + OS.indent(Depth + 2) << "Group " << I << ":\n"; + OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low + << " High: " << *CheckingGroups[I].High << ")\n"; + for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) { + OS.indent(Depth + 6) << "Member: " + << *Pointers[CheckingGroups[I].Members[J]].Expr + << "\n"; + } + } } -unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks( +unsigned RuntimePointerChecking::getNumberOfChecks( const SmallVectorImpl<int> *PtrPartition) const { - unsigned NumPointers = Pointers.size(); + + unsigned NumPartitions = CheckingGroups.size(); unsigned CheckCount = 0; - for (unsigned I = 0; I < NumPointers; ++I) - for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J, PtrPartition)) + for (unsigned I = 0; I < NumPartitions; ++I) + for (unsigned J = I + 1; J < NumPartitions; ++J) + if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) CheckCount++; return CheckCount; } -bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( +bool RuntimePointerChecking::needsAnyChecking( const SmallVectorImpl<int> *PtrPartition) const { - return getNumberOfChecks(PtrPartition) != 0; + unsigned NumPointers = Pointers.size(); + + for (unsigned I = 0; I < NumPointers; ++I) + for (unsigned J = I + 1; J < NumPointers; ++J) + if (needsChecking(I, J, PtrPartition)) + return true; + return false; } namespace { @@ -207,7 +391,8 @@ public: AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA) - : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {} + : DL(Dl), AST(*AA), LI(LI), DepCands(DA), + IsRTCheckAnalysisNeeded(false) {} /// \brief Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -226,11 +411,12 @@ public: } /// \brief Check whether we can check the pointers at runtime for - /// non-intersection. Returns true when we have 0 pointers - /// (a check on 0 pointers for non-intersection will always return true). - bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck, - bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop, - const ValueToValueMap &Strides, + /// non-intersection. + /// + /// Returns true if we need no check or if we do and we can generate them + /// (i.e. the pointers have computable bounds). + bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, + Loop *TheLoop, const ValueToValueMap &Strides, bool ShouldCheckStride = false); /// \brief Goes over all memory accesses, checks whether a RT check is needed @@ -239,8 +425,11 @@ public: processMemAccesses(); } - bool isRTCheckNeeded() { return IsRTCheckNeeded; } - + /// \brief Initial processing of memory accesses determined that we need to + /// perform dependency checking. + /// + /// Note that this can later be cleared if we retry memcheck analysis without + /// dependency checking (i.e. ShouldRetryWithRuntimeCheck). bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } /// We decided that no dependence analysis would be used. Reset the state. @@ -255,7 +444,7 @@ private: typedef SetVector<MemAccessInfo> PtrAccessSet; /// \brief Go over all memory access and check whether runtime pointer checks - /// are needed /// and build sets of dependency check candidates. + /// are needed and build sets of dependency check candidates. void processMemAccesses(); /// Set of all accesses. @@ -280,7 +469,14 @@ private: /// dependence check. MemoryDepChecker::DepCandidates &DepCands; - bool IsRTCheckNeeded; + /// \brief Initial processing of memory accesses determined that we may need + /// to add memchecks. Perform the analysis to determine the necessary checks. + /// + /// Note that, this is different from isDependencyCheckNeeded. When we retry + /// memcheck analysis without dependency checking + /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared + /// while this remains set if we have potentially dependent accesses. + bool IsRTCheckAnalysisNeeded; }; } // end anonymous namespace @@ -296,16 +492,16 @@ static bool hasComputableBounds(ScalarEvolution *SE, return AR->isAffine(); } -bool AccessAnalysis::canCheckPtrAtRT( - LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck, - ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, - bool ShouldCheckStride) { +bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, + ScalarEvolution *SE, Loop *TheLoop, + const ValueToValueMap &StridesMap, + bool ShouldCheckStride) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; - NeedRTCheck = false; - if (!IsRTCheckNeeded) return true; + bool NeedRTCheck = false; + if (!IsRTCheckAnalysisNeeded) return true; bool IsDepCheckNeeded = isDependencyCheckNeeded(); @@ -313,6 +509,9 @@ bool AccessAnalysis::canCheckPtrAtRT( // Accesses between different groups doesn't need to be checked. unsigned ASId = 1; for (auto &AS : AST) { + int NumReadPtrChecks = 0; + int NumWritePtrChecks = 0; + // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. unsigned RunningDepId = 1; @@ -323,6 +522,11 @@ bool AccessAnalysis::canCheckPtrAtRT( bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); MemAccessInfo Access(Ptr, IsWrite); + if (IsWrite) + ++NumWritePtrChecks; + else + ++NumReadPtrChecks; + if (hasComputableBounds(SE, StridesMap, Ptr) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. @@ -341,7 +545,7 @@ bool AccessAnalysis::canCheckPtrAtRT( // Each access has its own dependence set. DepId = RunningDepId++; - RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } else { @@ -350,15 +554,21 @@ bool AccessAnalysis::canCheckPtrAtRT( } } + // If we have at least two writes or one write and a read then we need to + // check them. But there is no need to checks if there is only one + // dependence set for this alias set. + // + // Note that this function computes CanDoRT and NeedRTCheck independently. + // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer + // for which we couldn't find the bounds but we don't actually need to emit + // any checks so it does not matter. + if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2)) + NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 && + NumWritePtrChecks >= 1)); + ++ASId; } - // We need a runtime check if there are any accesses that need checking. - // However, some accesses cannot be checked (for example because we - // can't determine their bounds). In these cases we would need a check - // but wouldn't be able to add it. - NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr); - // If the pointers that we would use for the bounds comparison have different // address spaces, assume the values aren't directly comparable, so we can't // use them for the runtime check. We also have to assume they could @@ -368,14 +578,15 @@ bool AccessAnalysis::canCheckPtrAtRT( for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i + 1; j < NumPointers; ++j) { // Only need to check pointers between two different dependency sets. - if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j]) + if (RtCheck.Pointers[i].DependencySetId == + RtCheck.Pointers[j].DependencySetId) continue; // Only need to check pointers in the same alias set. - if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j]) + if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId) continue; - Value *PtrI = RtCheck.Pointers[i]; - Value *PtrJ = RtCheck.Pointers[j]; + Value *PtrI = RtCheck.Pointers[i].PointerValue; + Value *PtrJ = RtCheck.Pointers[j].PointerValue; unsigned ASi = PtrI->getType()->getPointerAddressSpace(); unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); @@ -387,7 +598,18 @@ bool AccessAnalysis::canCheckPtrAtRT( } } - return CanDoRT; + if (NeedRTCheck && CanDoRT) + RtCheck.groupChecks(DepCands, IsDepCheckNeeded); + + DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr) + << " pointer comparisons.\n"); + + RtCheck.Need = NeedRTCheck; + + bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT; + if (!CanDoRTIfNeeded) + RtCheck.reset(); + return CanDoRTIfNeeded; } void AccessAnalysis::processMemAccesses() { @@ -470,7 +692,7 @@ void AccessAnalysis::processMemAccesses() { // catch "a[i] = a[i] + " without having to do a dependence check). if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { CheckDeps.insert(Access); - IsRTCheckNeeded = true; + IsRTCheckAnalysisNeeded = true; } if (IsWrite) @@ -600,7 +822,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, // Check the step is constant. const SCEV *Step = AR->getStepRecurrence(*SE); - // Calculate the pointer stride and check if it is consecutive. + // Calculate the pointer stride and check if it is constant. const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); if (!C) { DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << @@ -805,11 +1027,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " << *InstMap[BIdx] << ": " << *Dist << "\n"); - // Need consecutive accesses. We don't want to vectorize + // Need accesses with constant stride. We don't want to vectorize // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in // the address space. if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ - DEBUG(dbgs() << "Non-consecutive pointer access\n"); + DEBUG(dbgs() << "Pointer access with non-constant stride\n"); return Dependence::Unknown; } @@ -859,8 +1081,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, unsigned Stride = std::abs(StrideAPtr); if (Stride > 1 && - areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) + areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) { + DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); return Dependence::NoDep; + } // Bail out early if passed-in parameters make vectorization not feasible. unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? @@ -1098,8 +1322,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { unsigned NumReads = 0; unsigned NumReadWrites = 0; - PtrRtCheck.Pointers.clear(); - PtrRtCheck.Need = false; + PtrRtChecking.Pointers.clear(); + PtrRtChecking.Need = false; const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); @@ -1258,28 +1482,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. - bool NeedRTCheck; - bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, - NeedRTCheck, SE, - TheLoop, Strides); - - DEBUG(dbgs() << "LAA: We need to do " - << PtrRtCheck.getNumberOfChecks(nullptr) - << " pointer comparisons.\n"); - - // Check that we found the bounds for the pointer. - if (CanDoRT) - DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); - else if (NeedRTCheck) { + bool CanDoRTIfNeeded = + Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides); + if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); - DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << - "the array bounds.\n"); - PtrRtCheck.reset(); + DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " + << "the array bounds.\n"); CanVecMem = false; return; } - PtrRtCheck.Need = NeedRTCheck; + DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); CanVecMem = true; if (Accesses.isDependencyCheckNeeded()) { @@ -1290,23 +1503,21 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) { DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); - NeedRTCheck = true; // Clear the dependency checks. We assume they are not needed. Accesses.resetDepChecks(DepChecker); - PtrRtCheck.reset(); - PtrRtCheck.Need = true; + PtrRtChecking.reset(); + PtrRtChecking.Need = true; - CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE, - TheLoop, Strides, true); + CanDoRTIfNeeded = + Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); // Check that we found the bounds for the pointer. - if (NeedRTCheck && !CanDoRT) { + if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot check memory dependencies at runtime"); DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); - PtrRtCheck.reset(); CanVecMem = false; return; } @@ -1317,8 +1528,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (CanVecMem) DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We" - << (NeedRTCheck ? "" : " don't") - << " need a runtime memory check.\n"); + << (PtrRtChecking.Need ? "" : " don't") + << " need runtime memory checks.\n"); else { emitAnalysis(LoopAccessReport() << "unsafe dependent memory operations in loop"); @@ -1357,35 +1568,38 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const { - if (!PtrRtCheck.Need) + if (!PtrRtChecking.Need) return std::make_pair(nullptr, nullptr); - unsigned NumPointers = PtrRtCheck.Pointers.size(); - SmallVector<TrackingVH<Value> , 2> Starts; - SmallVector<TrackingVH<Value> , 2> Ends; + SmallVector<TrackingVH<Value>, 2> Starts; + SmallVector<TrackingVH<Value>, 2> Ends; LLVMContext &Ctx = Loc->getContext(); SCEVExpander Exp(*SE, DL, "induction"); Instruction *FirstInst = nullptr; - for (unsigned i = 0; i < NumPointers; ++i) { - Value *Ptr = PtrRtCheck.Pointers[i]; + for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { + const RuntimePointerChecking::CheckingPtrGroup &CG = + PtrRtChecking.CheckingGroups[i]; + Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue; const SCEV *Sc = SE->getSCEV(Ptr); if (SE->isLoopInvariant(Sc, TheLoop)) { - DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << - *Ptr <<"\n"); + DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr + << "\n"); Starts.push_back(Ptr); Ends.push_back(Ptr); } else { - DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n'); unsigned AS = Ptr->getType()->getPointerAddressSpace(); // Use this type for pointer arithmetic. Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); + Value *Start = nullptr, *End = nullptr; - Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc); - Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc); + DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); + Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc); + End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc); + DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n"); Starts.push_back(Start); Ends.push_back(End); } @@ -1394,9 +1608,14 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( IRBuilder<> ChkBuilder(Loc); // Our instructions might fold to a constant. Value *MemoryRuntimeCheck = nullptr; - for (unsigned i = 0; i < NumPointers; ++i) { - for (unsigned j = i+1; j < NumPointers; ++j) { - if (!PtrRtCheck.needsChecking(i, j, PtrPartition)) + for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { + for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) { + const RuntimePointerChecking::CheckingPtrGroup &CGI = + PtrRtChecking.CheckingGroups[i]; + const RuntimePointerChecking::CheckingPtrGroup &CGJ = + PtrRtChecking.CheckingGroups[j]; + + if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition)) continue; unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); @@ -1447,7 +1666,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), + : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { @@ -1457,7 +1676,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (CanVecMem) { - if (PtrRtCheck.Need) + if (PtrRtChecking.Need) OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; else OS.indent(Depth) << "Memory dependences are safe\n"; @@ -1476,7 +1695,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { OS.indent(Depth) << "Too many interesting dependences, not recorded\n"; // List the pair of accesses need run-time checks to prove independence. - PtrRtCheck.print(OS, Depth); + PtrRtChecking.print(OS, Depth); OS << "\n"; OS.indent(Depth) << "Store to invariant address was " diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp index 7617622b9ab6..322a9a80de4c 100644 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -72,7 +72,6 @@ namespace { } void deleteValue(Value *V) override {} - void copyValue(Value *From, Value *To) override {} void addEscapingUse(Use &U) override {} /// getAdjustedAnalysisPointer - This method is used when a pass implements diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 520d1e5ef87d..7d1c3fbef68a 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -28,12 +28,12 @@ namespace { /// /// This is used when no target specific information is available. struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> { - explicit NoTTIImpl(const DataLayout *DL) + explicit NoTTIImpl(const DataLayout &DL) : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {} }; } -TargetTransformInfo::TargetTransformInfo(const DataLayout *DL) +TargetTransformInfo::TargetTransformInfo(const DataLayout &DL) : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {} TargetTransformInfo::~TargetTransformInfo() {} @@ -304,7 +304,7 @@ TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) { char TargetIRAnalysis::PassID; TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) { - return Result(&F.getParent()->getDataLayout()); + return Result(F.getParent()->getDataLayout()); } // Register the basic pass. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index c45005f343d3..fa0d7798cae9 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1464,7 +1464,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // If the object is defined in the current Module, we'll be giving // it the preferred alignment. Otherwise, we have to assume that it // may only have the minimum ABI alignment. - if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) + if (GVar->isStrongDefinitionForLinker()) Align = DL.getPreferredAlignment(GVar); else Align = DL.getABITypeAlignment(ObjectType); diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 96fddd103cc5..67f68dc8391e 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -11,7 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Value.h" /// \brief Identify if the intrinsic is trivially vectorizable. /// This method returns true if the intrinsic's argument types are all @@ -211,3 +217,195 @@ llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI, return Intrinsic::not_intrinsic; } + +/// \brief Find the operand of the GEP that should be checked for consecutive +/// stores. This ignores trailing indices that have no effect on the final +/// pointer. +unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { + const DataLayout &DL = Gep->getModule()->getDataLayout(); + unsigned LastOperand = Gep->getNumOperands() - 1; + unsigned GEPAllocSize = DL.getTypeAllocSize( + cast<PointerType>(Gep->getType()->getScalarType())->getElementType()); + + // Walk backwards and try to peel off zeros. + while (LastOperand > 1 && + match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) { + // Find the type we're currently indexing into. + gep_type_iterator GEPTI = gep_type_begin(Gep); + std::advance(GEPTI, LastOperand - 1); + + // If it's a type with the same allocation size as the result of the GEP we + // can peel off the zero index. + if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize) + break; + --LastOperand; + } + + return LastOperand; +} + +/// \brief If the argument is a GEP, then returns the operand identified by +/// getGEPInductionOperand. However, if there is some other non-loop-invariant +/// operand, it returns that instead. +llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE, + Loop *Lp) { + GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); + if (!GEP) + return Ptr; + + unsigned InductionOperand = getGEPInductionOperand(GEP); + + // Check that all of the gep indices are uniform except for our induction + // operand. + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) + if (i != InductionOperand && + !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp)) + return Ptr; + return GEP->getOperand(InductionOperand); +} + +/// \brief If a value has only one user that is a CastInst, return it. +llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) { + llvm::Value *UniqueCast = nullptr; + for (User *U : Ptr->users()) { + CastInst *CI = dyn_cast<CastInst>(U); + if (CI && CI->getType() == Ty) { + if (!UniqueCast) + UniqueCast = CI; + else + return nullptr; + } + } + return UniqueCast; +} + +/// \brief Get the stride of a pointer access in a loop. Looks for symbolic +/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. +llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE, + Loop *Lp) { + const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); + if (!PtrTy || PtrTy->isAggregateType()) + return nullptr; + + // Try to remove a gep instruction to make the pointer (actually index at this + // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the + // pointer, otherwise, we are analyzing the index. + llvm::Value *OrigPtr = Ptr; + + // The size of the pointer access. + int64_t PtrAccessSize = 1; + + Ptr = stripGetElementPtr(Ptr, SE, Lp); + const SCEV *V = SE->getSCEV(Ptr); + + if (Ptr != OrigPtr) + // Strip off casts. + while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) + V = C->getOperand(); + + const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V); + if (!S) + return nullptr; + + V = S->getStepRecurrence(*SE); + if (!V) + return nullptr; + + // Strip off the size of access multiplication if we are still analyzing the + // pointer. + if (OrigPtr == Ptr) { + const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout(); + DL.getTypeAllocSize(PtrTy->getElementType()); + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) { + if (M->getOperand(0)->getSCEVType() != scConstant) + return nullptr; + + const APInt &APStepVal = + cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue(); + + // Huge step value - give up. + if (APStepVal.getBitWidth() > 64) + return nullptr; + + int64_t StepVal = APStepVal.getSExtValue(); + if (PtrAccessSize != StepVal) + return nullptr; + V = M->getOperand(1); + } + } + + // Strip off casts. + Type *StripedOffRecurrenceCast = nullptr; + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) { + StripedOffRecurrenceCast = C->getType(); + V = C->getOperand(); + } + + // Look for the loop invariant symbolic value. + const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V); + if (!U) + return nullptr; + + llvm::Value *Stride = U->getValue(); + if (!Lp->isLoopInvariant(Stride)) + return nullptr; + + // If we have stripped off the recurrence cast we have to make sure that we + // return the value that is used in this loop so that we can replace it later. + if (StripedOffRecurrenceCast) + Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast); + + return Stride; +} + +/// \brief Given a vector and an element number, see if the scalar value is +/// already around as a register, for example if it were inserted then extracted +/// from the vector. +llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) { + assert(V->getType()->isVectorTy() && "Not looking at a vector?"); + VectorType *VTy = cast<VectorType>(V->getType()); + unsigned Width = VTy->getNumElements(); + if (EltNo >= Width) // Out of range access. + return UndefValue::get(VTy->getElementType()); + + if (Constant *C = dyn_cast<Constant>(V)) + return C->getAggregateElement(EltNo); + + if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { + // If this is an insert to a variable element, we don't know what it is. + if (!isa<ConstantInt>(III->getOperand(2))) + return nullptr; + unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); + + // If this is an insert to the element we are looking for, return the + // inserted value. + if (EltNo == IIElt) + return III->getOperand(1); + + // Otherwise, the insertelement doesn't modify the value, recurse on its + // vector input. + return findScalarElement(III->getOperand(0), EltNo); + } + + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { + unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); + int InEl = SVI->getMaskValue(EltNo); + if (InEl < 0) + return UndefValue::get(VTy->getElementType()); + if (InEl < (int)LHSWidth) + return findScalarElement(SVI->getOperand(0), InEl); + return findScalarElement(SVI->getOperand(1), InEl - LHSWidth); + } + + // Extract a value from a vector add operation with a constant zero. + Value *Val = nullptr; Constant *Con = nullptr; + if (match(V, + llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val), + llvm::PatternMatch::m_Constant(Con)))) { + if (Con->getAggregateElement(EltNo)->isNullValue()) + return findScalarElement(Val, EltNo); + } + + // Otherwise, we don't know. + return nullptr; +} diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 88f359d4fd5c..5c4bab734b2b 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -593,6 +593,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(attributes); KEYWORD(alwaysinline); + KEYWORD(argmemonly); KEYWORD(builtin); KEYWORD(byval); KEYWORD(inalloca); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index b3c7fa087d40..1c6e7bd18d0e 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -946,35 +946,42 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, B.addStackAlignmentAttr(Alignment); continue; } - case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break; - case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break; - case lltok::kw_cold: B.addAttribute(Attribute::Cold); break; - case lltok::kw_convergent: B.addAttribute(Attribute::Convergent); break; - case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; - case lltok::kw_jumptable: B.addAttribute(Attribute::JumpTable); break; - case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; - case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; - case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break; - case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break; - case lltok::kw_noimplicitfloat: B.addAttribute(Attribute::NoImplicitFloat); break; - case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break; - case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break; - case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break; - case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break; - case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break; - case lltok::kw_optnone: B.addAttribute(Attribute::OptimizeNone); break; - case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break; - case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; - case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; - case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break; - case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; - case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; - case lltok::kw_sspstrong: B.addAttribute(Attribute::StackProtectStrong); break; - case lltok::kw_safestack: B.addAttribute(Attribute::SafeStack); break; - case lltok::kw_sanitize_address: B.addAttribute(Attribute::SanitizeAddress); break; - case lltok::kw_sanitize_thread: B.addAttribute(Attribute::SanitizeThread); break; - case lltok::kw_sanitize_memory: B.addAttribute(Attribute::SanitizeMemory); break; - case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; + case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break; + case lltok::kw_argmemonly: B.addAttribute(Attribute::ArgMemOnly); break; + case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break; + case lltok::kw_cold: B.addAttribute(Attribute::Cold); break; + case lltok::kw_convergent: B.addAttribute(Attribute::Convergent); break; + case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; + case lltok::kw_jumptable: B.addAttribute(Attribute::JumpTable); break; + case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; + case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; + case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break; + case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break; + case lltok::kw_noimplicitfloat: + B.addAttribute(Attribute::NoImplicitFloat); break; + case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break; + case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break; + case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break; + case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break; + case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break; + case lltok::kw_optnone: B.addAttribute(Attribute::OptimizeNone); break; + case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break; + case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; + case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; + case lltok::kw_returns_twice: + B.addAttribute(Attribute::ReturnsTwice); break; + case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; + case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; + case lltok::kw_sspstrong: + B.addAttribute(Attribute::StackProtectStrong); break; + case lltok::kw_safestack: B.addAttribute(Attribute::SafeStack); break; + case lltok::kw_sanitize_address: + B.addAttribute(Attribute::SanitizeAddress); break; + case lltok::kw_sanitize_thread: + B.addAttribute(Attribute::SanitizeThread); break; + case lltok::kw_sanitize_memory: + B.addAttribute(Attribute::SanitizeMemory); break; + case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; // Error handling. case lltok::kw_inreg: @@ -1258,6 +1265,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_alignstack: case lltok::kw_alwaysinline: + case lltok::kw_argmemonly: case lltok::kw_builtin: case lltok::kw_inlinehint: case lltok::kw_jumptable: @@ -1334,6 +1342,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_alignstack: case lltok::kw_alwaysinline: + case lltok::kw_argmemonly: case lltok::kw_builtin: case lltok::kw_cold: case lltok::kw_inlinehint: @@ -2873,8 +2882,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (ValTy->isVectorTy() != BaseType->isVectorTy()) return Error(ID.Loc, "getelementptr index type missmatch"); if (ValTy->isVectorTy()) { - unsigned ValNumEl = cast<VectorType>(ValTy)->getNumElements(); - unsigned PtrNumEl = cast<VectorType>(BaseType)->getNumElements(); + unsigned ValNumEl = ValTy->getVectorNumElements(); + unsigned PtrNumEl = BaseType->getVectorNumElements(); if (ValNumEl != PtrNumEl) return Error( ID.Loc, @@ -4534,8 +4543,17 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_and: case lltok::kw_or: case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal); - case lltok::kw_icmp: - case lltok::kw_fcmp: return ParseCompare(Inst, PFS, KeywordVal); + case lltok::kw_icmp: return ParseCompare(Inst, PFS, KeywordVal); + case lltok::kw_fcmp: { + FastMathFlags FMF = EatFastMathFlagsIfPresent(); + int Res = ParseCompare(Inst, PFS, KeywordVal); + if (Res != 0) + return Res; + if (FMF.any()) + Inst->setFastMathFlags(FMF); + return 0; + } + // Casts. case lltok::kw_trunc: case lltok::kw_zext: @@ -5572,6 +5590,11 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { SmallVector<Value*, 16> Indices; bool AteExtraComma = false; + // GEP returns a vector of pointers if at least one of parameters is a vector. + // All vector parameters should have the same vector width. + unsigned GEPWidth = BaseType->isVectorTy() ? + BaseType->getVectorNumElements() : 0; + while (EatIfPresent(lltok::comma)) { if (Lex.getKind() == lltok::MetadataVar) { AteExtraComma = true; @@ -5580,14 +5603,13 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { if (ParseTypeAndValue(Val, EltLoc, PFS)) return true; if (!Val->getType()->getScalarType()->isIntegerTy()) return Error(EltLoc, "getelementptr index must be an integer"); - if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy()) - return Error(EltLoc, "getelementptr index type missmatch"); + if (Val->getType()->isVectorTy()) { - unsigned ValNumEl = cast<VectorType>(Val->getType())->getNumElements(); - unsigned PtrNumEl = cast<VectorType>(Ptr->getType())->getNumElements(); - if (ValNumEl != PtrNumEl) + unsigned ValNumEl = Val->getType()->getVectorNumElements(); + if (GEPWidth && GEPWidth != ValNumEl) return Error(EltLoc, "getelementptr vector index has a wrong number of elements"); + GEPWidth = ValNumEl; } Indices.push_back(Val); } diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 2487d1208133..691f085f0c9f 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -100,6 +100,7 @@ namespace lltok { // Attributes: kw_attributes, kw_alwaysinline, + kw_argmemonly, kw_sanitize_address, kw_builtin, kw_byval, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 09f0b689bdc3..c04e8b9f1f37 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -697,6 +697,21 @@ static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) { } } +static FastMathFlags getDecodedFastMathFlags(unsigned Val) { + FastMathFlags FMF; + if (0 != (Val & FastMathFlags::UnsafeAlgebra)) + FMF.setUnsafeAlgebra(); + if (0 != (Val & FastMathFlags::NoNaNs)) + FMF.setNoNaNs(); + if (0 != (Val & FastMathFlags::NoInfs)) + FMF.setNoInfs(); + if (0 != (Val & FastMathFlags::NoSignedZeros)) + FMF.setNoSignedZeros(); + if (0 != (Val & FastMathFlags::AllowReciprocal)) + FMF.setAllowReciprocal(); + return FMF; +} + static void upgradeDLLImportExportLinkage(llvm::GlobalValue *GV, unsigned Val) { switch (Val) { case 5: GV->setDLLStorageClass(GlobalValue::DLLImportStorageClass); break; @@ -1075,6 +1090,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::Alignment; case bitc::ATTR_KIND_ALWAYS_INLINE: return Attribute::AlwaysInline; + case bitc::ATTR_KIND_ARGMEMONLY: + return Attribute::ArgMemOnly; case bitc::ATTR_KIND_BUILTIN: return Attribute::Builtin; case bitc::ATTR_KIND_BY_VAL: @@ -3472,17 +3489,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { if (Record[OpNum] & (1 << bitc::PEO_EXACT)) cast<BinaryOperator>(I)->setIsExact(true); } else if (isa<FPMathOperator>(I)) { - FastMathFlags FMF; - if (0 != (Record[OpNum] & FastMathFlags::UnsafeAlgebra)) - FMF.setUnsafeAlgebra(); - if (0 != (Record[OpNum] & FastMathFlags::NoNaNs)) - FMF.setNoNaNs(); - if (0 != (Record[OpNum] & FastMathFlags::NoInfs)) - FMF.setNoInfs(); - if (0 != (Record[OpNum] & FastMathFlags::NoSignedZeros)) - FMF.setNoSignedZeros(); - if (0 != (Record[OpNum] & FastMathFlags::AllowReciprocal)) - FMF.setAllowReciprocal(); + FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]); if (FMF.any()) I->setFastMathFlags(FMF); } @@ -3739,14 +3746,25 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) { unsigned OpNum = 0; Value *LHS, *RHS; if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || - popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || - OpNum+1 != Record.size()) + popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS)) + return error("Invalid record"); + + unsigned PredVal = Record[OpNum]; + bool IsFP = LHS->getType()->isFPOrFPVectorTy(); + FastMathFlags FMF; + if (IsFP && Record.size() > OpNum+1) + FMF = getDecodedFastMathFlags(Record[++OpNum]); + + if (OpNum+1 != Record.size()) return error("Invalid record"); if (LHS->getType()->isFPOrFPVectorTy()) - I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS); + I = new FCmpInst((FCmpInst::Predicate)PredVal, LHS, RHS); else - I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS); + I = new ICmpInst((ICmpInst::Predicate)PredVal, LHS, RHS); + + if (FMF.any()) + I->setFastMathFlags(FMF); InstructionList.push_back(I); break; } @@ -4458,14 +4476,11 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) { // Upgrade any old intrinsic calls in the function. for (auto &I : UpgradedIntrinsics) { - if (I.first != I.second) { - for (auto UI = I.first->user_begin(), UE = I.first->user_end(); - UI != UE;) { - User *U = *UI; - ++UI; - if (CallInst *CI = dyn_cast<CallInst>(U)) - UpgradeIntrinsicCall(CI, I.second); - } + for (auto UI = I.first->user_begin(), UE = I.first->user_end(); UI != UE;) { + User *U = *UI; + ++UI; + if (CallInst *CI = dyn_cast<CallInst>(U)) + UpgradeIntrinsicCall(CI, I.second); } } @@ -4533,15 +4548,13 @@ std::error_code BitcodeReader::materializeModule(Module *M) { // module is materialized because there could always be another function body // with calls to the old function. for (auto &I : UpgradedIntrinsics) { - if (I.first != I.second) { - for (auto *U : I.first->users()) { - if (CallInst *CI = dyn_cast<CallInst>(U)) - UpgradeIntrinsicCall(CI, I.second); - } - if (!I.first->use_empty()) - I.first->replaceAllUsesWith(I.second); - I.first->eraseFromParent(); + for (auto *U : I.first->users()) { + if (CallInst *CI = dyn_cast<CallInst>(U)) + UpgradeIntrinsicCall(CI, I.second); } + if (!I.first->use_empty()) + I.first->replaceAllUsesWith(I.second); + I.first->eraseFromParent(); } UpgradedIntrinsics.clear(); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 622f7eaf0784..1a70ba5ac127 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -162,6 +162,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_ALIGNMENT; case Attribute::AlwaysInline: return bitc::ATTR_KIND_ALWAYS_INLINE; + case Attribute::ArgMemOnly: + return bitc::ATTR_KIND_ARGMEMONLY; case Attribute::Builtin: return bitc::ATTR_KIND_BUILTIN; case Attribute::ByVal: @@ -1759,13 +1761,17 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, pushValue(I.getOperand(2), InstID, Vals, VE); break; case Instruction::ICmp: - case Instruction::FCmp: + case Instruction::FCmp: { // compare returning Int1Ty or vector of Int1Ty Code = bitc::FUNC_CODE_INST_CMP2; PushValueAndType(I.getOperand(0), InstID, Vals, VE); pushValue(I.getOperand(1), InstID, Vals, VE); Vals.push_back(cast<CmpInst>(I).getPredicate()); + uint64_t Flags = GetOptimizationFlags(&I); + if (Flags != 0) + Vals.push_back(Flags); break; + } case Instruction::Ret: { diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 3224fac25cb4..98d4c8afc7b9 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -81,27 +81,27 @@ unsigned llvm::ComputeLinearIndex(Type *Ty, /// If Offsets is non-null, it points to a vector to be filled in /// with the in-memory offsets of each of the individual values. /// -void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty, - SmallVectorImpl<EVT> &ValueVTs, +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl<EVT> &ValueVTs, SmallVectorImpl<uint64_t> *Offsets, uint64_t StartingOffset) { // Given a struct type, recursively traverse the elements. if (StructType *STy = dyn_cast<StructType>(Ty)) { - const StructLayout *SL = TLI.getDataLayout()->getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); for (StructType::element_iterator EB = STy->element_begin(), EI = EB, EE = STy->element_end(); EI != EE; ++EI) - ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, + ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets, StartingOffset + SL->getElementOffset(EI - EB)); return; } // Given an array type, recursively traverse the elements. if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Type *EltTy = ATy->getElementType(); - uint64_t EltSize = TLI.getDataLayout()->getTypeAllocSize(EltTy); + uint64_t EltSize = DL.getTypeAllocSize(EltTy); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) - ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, + ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets, StartingOffset + i * EltSize); return; } @@ -109,7 +109,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty, if (Ty->isVoidTy()) return; // Base case: we can get an EVT for this LLVM IR type. - ValueVTs.push_back(TLI.getValueType(Ty)); + ValueVTs.push_back(TLI.getValueType(DL, Ty)); if (Offsets) Offsets->push_back(StartingOffset); } @@ -233,7 +233,8 @@ static bool isNoopBitcast(Type *T1, Type *T2, static const Value *getNoopInput(const Value *V, SmallVectorImpl<unsigned> &ValLoc, unsigned &DataBits, - const TargetLoweringBase &TLI) { + const TargetLoweringBase &TLI, + const DataLayout &DL) { while (true) { // Try to look through V1; if V1 is not an instruction, it can't be looked // through. @@ -255,16 +256,16 @@ static const Value *getNoopInput(const Value *V, // Make sure this isn't a truncating or extending cast. We could // support this eventually, but don't bother for now. if (!isa<VectorType>(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(Op->getType())->getBitWidth()) + DL.getPointerSizeInBits() == + cast<IntegerType>(Op->getType())->getBitWidth()) NoopInput = Op; } else if (isa<PtrToIntInst>(I)) { // Look through ptrtoint. // Make sure this isn't a truncating or extending cast. We could // support this eventually, but don't bother for now. if (!isa<VectorType>(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast<IntegerType>(I->getType())->getBitWidth()) + DL.getPointerSizeInBits() == + cast<IntegerType>(I->getType())->getBitWidth()) NoopInput = Op; } else if (isa<TruncInst>(I) && TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { @@ -331,14 +332,15 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, SmallVectorImpl<unsigned> &RetIndices, SmallVectorImpl<unsigned> &CallIndices, bool AllowDifferingSizes, - const TargetLoweringBase &TLI) { + const TargetLoweringBase &TLI, + const DataLayout &DL) { // Trace the sub-value needed by the return value as far back up the graph as // possible, in the hope that it will intersect with the value produced by the // call. In the simple case with no "returned" attribute, the hope is actually // that we end up back at the tail call instruction itself. unsigned BitsRequired = UINT_MAX; - RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI); + RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI, DL); // If this slot in the value returned is undef, it doesn't matter what the // call puts there, it'll be fine. @@ -350,7 +352,7 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, // a "returned" attribute, the search will be blocked immediately and the loop // a Noop. unsigned BitsProvided = UINT_MAX; - CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI); + CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI, DL); // There's no hope if we can't actually trace them to (the same part of!) the // same value. @@ -606,7 +608,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, // Finally, we can check whether the value produced by the tail call at this // index is compatible with the value we return. if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, - AllowDifferingSizes, TLI)) + AllowDifferingSizes, TLI, + F->getParent()->getDataLayout())) return false; CallEmpty = !nextRealType(CallSubTypes, CallPath); diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 4cb460a7bbfc..0bad7954b980 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -69,24 +69,32 @@ void ARMException::beginFunction(const MachineFunction *MF) { /// void ARMException::endFunction(const MachineFunction *MF) { ARMTargetStreamer &ATS = getTargetStreamer(); + const Function *F = MF->getFunction(); + const Function *Per = nullptr; + if (F->hasPersonalityFn()) + Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + assert(!MMI->getPersonality() || Per == MMI->getPersonality()); + bool forceEmitPersonality = + F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && + F->needsUnwindTableEntry(); + bool shouldEmitPersonality = forceEmitPersonality || + !MMI->getLandingPads().empty(); if (!Asm->MF->getFunction()->needsUnwindTableEntry() && - MMI->getLandingPads().empty()) + !shouldEmitPersonality) ATS.emitCantUnwind(); - else { - if (!MMI->getLandingPads().empty()) { - // Emit references to personality. - if (const Function *Personality = MMI->getPersonality()) { - MCSymbol *PerSym = Asm->getSymbol(Personality); - Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global); - ATS.emitPersonality(PerSym); - } - - // Emit .handlerdata directive. - ATS.emitHandlerData(); - - // Emit actual exception table - emitExceptionTable(); + else if (shouldEmitPersonality) { + // Emit references to personality. + if (Per) { + MCSymbol *PerSym = Asm->getSymbol(Per); + Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global); + ATS.emitPersonality(PerSym); } + + // Emit .handlerdata directive. + ATS.emitHandlerData(); + + // Emit actual exception table + emitExceptionTable(); } if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 8a7e9f991611..125047e7bbb5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -820,7 +819,7 @@ void AsmPrinter::EmitFunctionBody() { emitCFIInstruction(MI); break; - case TargetOpcode::FRAME_ALLOC: + case TargetOpcode::LOCAL_ESCAPE: emitFrameAlloc(MI); break; @@ -1024,7 +1023,7 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit visibility info for declarations for (const Function &F : M) { - if (!F.isDeclaration()) + if (!F.isDeclarationForLinker()) continue; GlobalValue::VisibilityTypes V = F.getVisibility(); if (V == GlobalValue::DefaultVisibility) diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 0bc873e326be..2c212c7ecee1 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -89,6 +89,7 @@ void DwarfCFIException::endModule() { void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; + const Function *F = MF->getFunction(); // If any landing pads survive, we need an EH table. bool hasLandingPads = !MMI->getLandingPads().empty(); @@ -104,10 +105,24 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); - const Function *Per = MMI->getPersonality(); - - shouldEmitPersonality = hasLandingPads && - PerEncoding != dwarf::DW_EH_PE_omit && Per; + const Function *Per = nullptr; + if (F->hasPersonalityFn()) + Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + assert(!MMI->getPersonality() || Per == MMI->getPersonality()); + + // Emit a personality function even when there are no landing pads + bool forceEmitPersonality = + // ...if a personality function is explicitly specified + F->hasPersonalityFn() && + // ... and it's not known to be a noop in the absence of invokes + !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && + // ... and we're not explicitly asked not to emit it + F->needsUnwindTableEntry(); + + shouldEmitPersonality = + (forceEmitPersonality || + (hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit)) && + Per; unsigned LSDAEncoding = TLOF.getLSDAEncoding(); shouldEmitLSDA = shouldEmitPersonality && @@ -123,6 +138,11 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { if (!shouldEmitPersonality) return; + // If we are forced to emit this personality, make sure to record + // it because it might not appear in any landingpad + if (forceEmitPersonality) + MMI->addPersonality(Per); + const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 1c3e2aec64ab..01f34c6eb81c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -49,7 +49,7 @@ class DwarfUnit; class MachineModuleInfo; //===----------------------------------------------------------------------===// -/// \brief This class is used to record source line correspondence. +/// This class is used to record source line correspondence. class SrcLineInfo { unsigned Line; // Source line number. unsigned Column; // Source column. @@ -161,7 +161,7 @@ public: return dwarf::DW_TAG_variable; } - /// \brief Return true if DbgVariable is artificial. + /// Return true if DbgVariable is artificial. bool isArtificial() const { if (Var->isArtificial()) return true; @@ -190,149 +190,152 @@ public: const DIType *getType() const; private: - /// resolve - Look in the DwarfDebug map for the MDNode that + /// Look in the DwarfDebug map for the MDNode that /// corresponds to the reference. template <typename T> T *resolve(TypedDINodeRef<T> Ref) const; }; -/// \brief Helper used to pair up a symbol and its DWARF compile unit. +/// Helper used to pair up a symbol and its DWARF compile unit. struct SymbolCU { SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} const MCSymbol *Sym; DwarfCompileUnit *CU; }; -/// \brief Collects and handles dwarf debug information. +/// Collects and handles dwarf debug information. class DwarfDebug : public AsmPrinterHandler { - // Target of Dwarf emission. + /// Target of Dwarf emission. AsmPrinter *Asm; - // Collected machine module information. + /// Collected machine module information. MachineModuleInfo *MMI; - // All DIEValues are allocated through this allocator. + /// All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - // Maps MDNode with its corresponding DwarfCompileUnit. + /// Maps MDNode with its corresponding DwarfCompileUnit. MapVector<const MDNode *, DwarfCompileUnit *> CUMap; - // Maps subprogram MDNode with its corresponding DwarfCompileUnit. + /// Maps subprogram MDNode with its corresponding DwarfCompileUnit. MapVector<const MDNode *, DwarfCompileUnit *> SPMap; - // Maps a CU DIE with its corresponding DwarfCompileUnit. + /// Maps a CU DIE with its corresponding DwarfCompileUnit. DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap; - // List of all labels used in aranges generation. + /// List of all labels used in aranges generation. std::vector<SymbolCU> ArangeLabels; - // Size of each symbol emitted (for those symbols that have a specific size). + /// Size of each symbol emitted (for those symbols that have a specific size). DenseMap<const MCSymbol *, uint64_t> SymSize; LexicalScopes LScopes; - // Collection of abstract variables. + /// Collection of abstract variables. DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables; - // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists - // can refer to them in spite of insertions into this list. + /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists + /// can refer to them in spite of insertions into this list. DebugLocStream DebugLocs; - // This is a collection of subprogram MDNodes that are processed to - // create DIEs. + /// This is a collection of subprogram MDNodes that are processed to + /// create DIEs. SmallPtrSet<const MDNode *, 16> ProcessedSPNodes; - // Maps instruction with label emitted before instruction. + /// Maps instruction with label emitted before instruction. DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn; - // Maps instruction with label emitted after instruction. + /// Maps instruction with label emitted after instruction. DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; - // History of DBG_VALUE and clobber instructions for each user variable. - // Variables are listed in order of appearance. + /// History of DBG_VALUE and clobber instructions for each user + /// variable. Variables are listed in order of appearance. DbgValueHistoryMap DbgValues; - // Previous instruction's location information. This is used to determine - // label location to indicate scope boundries in dwarf debug info. + /// Previous instruction's location information. This is used to + /// determine label location to indicate scope boundries in dwarf + /// debug info. DebugLoc PrevInstLoc; MCSymbol *PrevLabel; - // This location indicates end of function prologue and beginning of function - // body. + /// This location indicates end of function prologue and beginning of + /// function body. DebugLoc PrologEndLoc; - // If nonnull, stores the current machine function we're processing. + /// If nonnull, stores the current machine function we're processing. const MachineFunction *CurFn; - // If nonnull, stores the current machine instruction we're processing. + /// If nonnull, stores the current machine instruction we're processing. const MachineInstr *CurMI; - // If nonnull, stores the CU in which the previous subprogram was contained. + /// If nonnull, stores the CU in which the previous subprogram was contained. const DwarfCompileUnit *PrevCU; - // As an optimization, there is no need to emit an entry in the directory - // table for the same directory as DW_AT_comp_dir. + /// As an optimization, there is no need to emit an entry in the directory + /// table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; - // Holder for the file specific debug information. + /// Holder for the file specific debug information. DwarfFile InfoHolder; - // Holders for the various debug information flags that we might need to - // have exposed. See accessor functions below for description. + /// Holders for the various debug information flags that we might need to + /// have exposed. See accessor functions below for description. - // Holder for imported entities. + /// Holder for imported entities. typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32> ImportedEntityMap; ImportedEntityMap ScopesWithImportedEntities; - // Map from MDNodes for user-defined types to the type units that describe - // them. + /// Map from MDNodes for user-defined types to the type units that + /// describe them. DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits; SmallVector< std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1> TypeUnitsUnderConstruction; - // Whether to emit the pubnames/pubtypes sections. + /// Whether to emit the pubnames/pubtypes sections. bool HasDwarfPubSections; - // Whether or not to use AT_ranges for compilation units. + /// Whether or not to use AT_ranges for compilation units. bool HasCURanges; - // Whether we emitted a function into a section other than the default - // text. + /// Whether we emitted a function into a section other than the + /// default text. bool UsedNonDefaultText; - // Whether to use the GNU TLS opcode (instead of the standard opcode). + /// Whether to use the GNU TLS opcode (instead of the standard opcode). bool UseGNUTLSOpcode; - // Version of dwarf we're emitting. + /// Version of dwarf we're emitting. unsigned DwarfVersion; - // Maps from a type identifier to the actual MDNode. + /// Maps from a type identifier to the actual MDNode. DITypeIdentifierMap TypeIdentifierMap; - // DWARF5 Experimental Options + /// DWARF5 Experimental Options + /// @{ bool HasDwarfAccelTables; bool HasSplitDwarf; - // Separated Dwarf Variables - // In general these will all be for bits that are left in the - // original object file, rather than things that are meant - // to be in the .dwo sections. + /// Separated Dwarf Variables + /// In general these will all be for bits that are left in the + /// original object file, rather than things that are meant + /// to be in the .dwo sections. - // Holder for the skeleton information. + /// Holder for the skeleton information. DwarfFile SkeletonHolder; - /// Store file names for type units under fission in a line table header that - /// will be emitted into debug_line.dwo. - // FIXME: replace this with a map from comp_dir to table so that we can emit - // multiple tables during LTO each of which uses directory 0, referencing the - // comp_dir of all the type units that use it. + /// Store file names for type units under fission in a line table + /// header that will be emitted into debug_line.dwo. + // FIXME: replace this with a map from comp_dir to table so that we + // can emit multiple tables during LTO each of which uses directory + // 0, referencing the comp_dir of all the type units that use it. MCDwarfDwoLineTable SplitTypeUnitFileTable; - - // True iff there are multiple CUs in this module. + /// @} + + /// True iff there are multiple CUs in this module. bool SingleCU; bool IsDarwin; bool IsPS4; @@ -354,7 +357,7 @@ class DwarfDebug : public AsmPrinterHandler { typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; - /// \brief Find abstract variable associated with Var. + /// Find abstract variable associated with Var. DbgVariable *getExistingAbstractVariable(InlinedVariable IV, const DILocalVariable *&Cleansed); DbgVariable *getExistingAbstractVariable(InlinedVariable IV); @@ -366,56 +369,56 @@ class DwarfDebug : public AsmPrinterHandler { DbgVariable *createConcreteVariable(LexicalScope &Scope, InlinedVariable IV); - /// \brief Construct a DIE for this abstract scope. + /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); - /// \brief Compute the size and offset of a DIE given an incoming Offset. + /// Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); - /// \brief Compute the size and offset of all the DIEs. + /// Compute the size and offset of all the DIEs. void computeSizeAndOffsets(); - /// \brief Collect info for variables that were optimized out. + /// Collect info for variables that were optimized out. void collectDeadVariables(); void finishVariableDefinitions(); void finishSubprogramDefinitions(); - /// \brief Finish off debug information after all functions have been + /// Finish off debug information after all functions have been /// processed. void finalizeModuleInfo(); - /// \brief Emit the debug info section. + /// Emit the debug info section. void emitDebugInfo(); - /// \brief Emit the abbreviation section. + /// Emit the abbreviation section. void emitAbbreviations(); - /// \brief Emit a specified accelerator table. + /// Emit a specified accelerator table. void emitAccel(DwarfAccelTable &Accel, MCSection *Section, StringRef TableName); - /// \brief Emit visible names into a hashed accelerator table section. + /// Emit visible names into a hashed accelerator table section. void emitAccelNames(); - /// \brief Emit objective C classes and categories into a hashed + /// Emit objective C classes and categories into a hashed /// accelerator table section. void emitAccelObjC(); - /// \brief Emit namespace dies into a hashed accelerator table. + /// Emit namespace dies into a hashed accelerator table. void emitAccelNamespaces(); - /// \brief Emit type dies into a hashed accelerator table. + /// Emit type dies into a hashed accelerator table. void emitAccelTypes(); - /// \brief Emit visible names into a debug pubnames section. + /// Emit visible names into a debug pubnames section. /// \param GnuStyle determines whether or not we want to emit /// additional information into the table ala newer gcc for gdb /// index. void emitDebugPubNames(bool GnuStyle = false); - /// \brief Emit visible types into a debug pubtypes section. + /// Emit visible types into a debug pubtypes section. /// \param GnuStyle determines whether or not we want to emit /// additional information into the table ala newer gcc for gdb /// index. @@ -425,91 +428,91 @@ class DwarfDebug : public AsmPrinterHandler { bool GnuStyle, MCSection *PSec, StringRef Name, const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const); - /// \brief Emit visible names into a debug str section. + /// Emit visible names into a debug str section. void emitDebugStr(); - /// \brief Emit visible names into a debug loc section. + /// Emit visible names into a debug loc section. void emitDebugLoc(); - /// \brief Emit visible names into a debug loc dwo section. + /// Emit visible names into a debug loc dwo section. void emitDebugLocDWO(); - /// \brief Emit visible names into a debug aranges section. + /// Emit visible names into a debug aranges section. void emitDebugARanges(); - /// \brief Emit visible names into a debug ranges section. + /// Emit visible names into a debug ranges section. void emitDebugRanges(); - /// \brief Emit inline info using custom format. + /// Emit inline info using custom format. void emitDebugInlineInfo(); /// DWARF 5 Experimental Split Dwarf Emitters - /// \brief Initialize common features of skeleton units. + /// Initialize common features of skeleton units. void initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr<DwarfUnit> NewU); - /// \brief Construct the split debug info compile unit for the debug info + /// Construct the split debug info compile unit for the debug info /// section. DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU); - /// \brief Construct the split debug info compile unit for the debug info + /// Construct the split debug info compile unit for the debug info /// section. DwarfTypeUnit &constructSkeletonTU(DwarfTypeUnit &TU); - /// \brief Emit the debug info dwo section. + /// Emit the debug info dwo section. void emitDebugInfoDWO(); - /// \brief Emit the debug abbrev dwo section. + /// Emit the debug abbrev dwo section. void emitDebugAbbrevDWO(); - /// \brief Emit the debug line dwo section. + /// Emit the debug line dwo section. void emitDebugLineDWO(); - /// \brief Emit the debug str dwo section. + /// Emit the debug str dwo section. void emitDebugStrDWO(); /// Flags to let the linker know we have emitted new style pubnames. Only /// emit it here if we don't have a skeleton CU for split dwarf. void addGnuPubAttributes(DwarfUnit &U, DIE &D) const; - /// \brief Create new DwarfCompileUnit for the given metadata node with tag + /// Create new DwarfCompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit); - /// \brief Construct imported_module or imported_declaration DIE. + /// Construct imported_module or imported_declaration DIE. void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, const DIImportedEntity *N); - /// \brief Register a source line with debug info. Returns the unique + /// Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the /// source line list. void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope, unsigned Flags); - /// \brief Indentify instructions that are marking the beginning of or + /// Indentify instructions that are marking the beginning of or /// ending of a scope. void identifyScopeMarkers(); - /// \brief Populate LexicalScope entries with variables' info. + /// Populate LexicalScope entries with variables' info. void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, DenseSet<InlinedVariable> &ProcessedVars); - /// \brief Build the location list for all DBG_VALUEs in the + /// Build the location list for all DBG_VALUEs in the /// function that describe the same variable. void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, const DbgValueHistoryMap::InstrRanges &Ranges); - /// \brief Collect variable information from the side table maintained + /// Collect variable information from the side table maintained /// by MMI. void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P); - /// \brief Ensure that a label will be emitted before MI. + /// Ensure that a label will be emitted before MI. void requestLabelBeforeInsn(const MachineInstr *MI) { LabelsBeforeInsn.insert(std::make_pair(MI, nullptr)); } - /// \brief Ensure that a label will be emitted after MI. + /// Ensure that a label will be emitted after MI. void requestLabelAfterInsn(const MachineInstr *MI) { LabelsAfterInsn.insert(std::make_pair(MI, nullptr)); } @@ -522,50 +525,50 @@ public: ~DwarfDebug() override; - /// \brief Emit all Dwarf sections that should come prior to the + /// Emit all Dwarf sections that should come prior to the /// content. void beginModule(); - /// \brief Emit all Dwarf sections that should come after the content. + /// Emit all Dwarf sections that should come after the content. void endModule() override; - /// \brief Gather pre-function debug information. + /// Gather pre-function debug information. void beginFunction(const MachineFunction *MF) override; - /// \brief Gather and emit post-function debug information. + /// Gather and emit post-function debug information. void endFunction(const MachineFunction *MF) override; - /// \brief Process beginning of an instruction. + /// Process beginning of an instruction. void beginInstruction(const MachineInstr *MI) override; - /// \brief Process end of an instruction. + /// Process end of an instruction. void endInstruction() override; - /// \brief Add a DIE to the set of types that we're going to pull into + /// Add a DIE to the set of types that we're going to pull into /// type units. void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &Die, const DICompositeType *CTy); - /// \brief Add a label so that arange data can be generated for it. + /// Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } - /// \brief For symbols that have a size designated (e.g. common symbols), + /// For symbols that have a size designated (e.g. common symbols), /// this tracks that size. void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override { SymSize[Sym] = Size; } - /// \brief Returns whether to use DW_OP_GNU_push_tls_address, instead of the + /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the /// standard DW_OP_form_tls_address opcode bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; } // Experimental DWARF5 features. - /// \brief Returns whether or not to emit tables that dwarf consumers can + /// Returns whether or not to emit tables that dwarf consumers can /// use to accelerate lookup. bool useDwarfAccelTables() const { return HasDwarfAccelTables; } - /// \brief Returns whether or not to change the current debug info for the + /// Returns whether or not to change the current debug info for the /// split dwarf proposal support. bool useSplitDwarf() const { return HasSplitDwarf; } @@ -579,7 +582,7 @@ public: /// Returns the entries for the .debug_loc section. const DebugLocStream &getDebugLocs() const { return DebugLocs; } - /// \brief Emit an entry for the debug loc section. This can be used to + /// Emit an entry for the debug loc section. This can be used to /// handle an entry that's going to be emitted into the debug loc section. void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocStream::Entry &Entry); @@ -592,7 +595,7 @@ public: return Ref.resolve(TypeIdentifierMap); } - /// \brief Return the TypeIdentifierMap. + /// Return the TypeIdentifierMap. const DITypeIdentifierMap &getTypeIdentifierMap() const { return TypeIdentifierMap; } @@ -627,14 +630,14 @@ public: less_first())); } - /// \brief A helper function to check whether the DIE for a given Scope is + /// A helper function to check whether the DIE for a given Scope is /// going to be null. bool isLexicalScopeDIENull(LexicalScope *Scope); - /// \brief Return Label preceding the instruction. + /// Return Label preceding the instruction. MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); - /// \brief Return Label immediately following the instruction. + /// Return Label immediately following the instruction. MCSymbol *getLabelAfterInsn(const MachineInstr *MI); // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit. diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 4000ae48a856..44d9d2245dda 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -113,7 +113,7 @@ protected: DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); - /// \brief Add a string attribute data and value. + /// Add a string attribute data and value. /// /// This is guaranteed to be in the local string pool instead of indirected. void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); @@ -142,10 +142,10 @@ public: unsigned getDebugInfoOffset() const { return DebugInfoOffset; } void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } - /// \brief Return true if this compile unit has something to write out. + /// Return true if this compile unit has something to write out. bool hasContent() const { return UnitDie.hasChildren(); } - /// \brief Get string containing language specific context for a global name. + /// Get string containing language specific context for a global name. /// /// Walks the metadata parent chain in a language specific manner (using the /// compile unit language) and returns it as a string. This is done at the @@ -162,42 +162,42 @@ public: virtual void addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context) {} - /// \brief Add a new name to the namespace accelerator table. + /// Add a new name to the namespace accelerator table. void addAccelNamespace(StringRef Name, const DIE &Die); - /// \brief Returns the DIE map slot for the specified debug variable. + /// Returns the DIE map slot for the specified debug variable. /// /// We delegate the request to DwarfDebug when the MDNode can be part of the /// type system, since DIEs for the type system can be shared across CUs and /// the mappings are kept in DwarfDebug. DIE *getDIE(const DINode *D) const; - /// \brief Returns a fresh newly allocated DIELoc. + /// Returns a fresh newly allocated DIELoc. DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc; } - /// \brief Insert DIE into the map. + /// Insert DIE into the map. /// /// We delegate the request to DwarfDebug when the MDNode can be part of the /// type system, since DIEs for the type system can be shared across CUs and /// the mappings are kept in DwarfDebug. void insertDIE(const DINode *Desc, DIE *D); - /// \brief Add a flag that is true to the DIE. + /// Add a flag that is true to the DIE. void addFlag(DIE &Die, dwarf::Attribute Attribute); - /// \brief Add an unsigned integer attribute data and value. + /// Add an unsigned integer attribute data and value. void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, uint64_t Integer); void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer); - /// \brief Add an signed integer attribute data and value. + /// Add an signed integer attribute data and value. void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form, int64_t Integer); void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer); - /// \brief Add a string attribute data and value. + /// Add a string attribute data and value. /// /// We always emit a reference to the string pool instead of immediate /// strings so that DIEs have more predictable sizes. In the case of split @@ -205,38 +205,38 @@ public: /// into the string table. void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); - /// \brief Add a Dwarf label attribute data and value. + /// Add a Dwarf label attribute data and value. DIE::value_iterator addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Label); void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label); - /// \brief Add an offset into a section attribute data and value. + /// Add an offset into a section attribute data and value. void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer); - /// \brief Add a dwarf op address data and value using the form given and an + /// Add a dwarf op address data and value using the form given and an /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index. void addOpAddress(DIELoc &Die, const MCSymbol *Label); - /// \brief Add a label delta attribute data and value. + /// Add a label delta attribute data and value. void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo); - /// \brief Add a DIE attribute data and value. + /// Add a DIE attribute data and value. void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry); - /// \brief Add a DIE attribute data and value. + /// Add a DIE attribute data and value. void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry); void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type); - /// \brief Add block data. + /// Add block data. void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block); - /// \brief Add block data. + /// Add block data. void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block); - /// \brief Add location information to specified debug information entry. + /// Add location information to specified debug information entry. void addSourceLine(DIE &Die, unsigned Line, StringRef File, StringRef Directory); void addSourceLine(DIE &Die, const DILocalVariable *V); @@ -246,30 +246,30 @@ public: void addSourceLine(DIE &Die, const DINamespace *NS); void addSourceLine(DIE &Die, const DIObjCProperty *Ty); - /// \brief Add constant value entry in variable DIE. + /// Add constant value entry in variable DIE. void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty); void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty); void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty); void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned); void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val); - /// \brief Add constant value entry in variable DIE. + /// Add constant value entry in variable DIE. void addConstantFPValue(DIE &Die, const MachineOperand &MO); void addConstantFPValue(DIE &Die, const ConstantFP *CFP); - /// \brief Add a linkage name, if it isn't empty. + /// Add a linkage name, if it isn't empty. void addLinkageName(DIE &Die, StringRef LinkageName); - /// \brief Add template parameters in buffer. + /// Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DINodeArray TParams); - /// \brief Add register operand. + /// Add register operand. /// \returns false if the register does not exist, e.g., because it was never /// materialized. bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, unsigned SizeInBits = 0, unsigned OffsetInBits = 0); - /// \brief Add register offset. + /// Add register offset. /// \returns false if the register does not exist, e.g., because it was never /// materialized. bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset); @@ -283,7 +283,7 @@ public: dwarf::Attribute Attribute, const MachineLocation &Location); - /// \brief Add a new type attribute to the specified entity. + /// Add a new type attribute to the specified entity. /// /// This takes and attribute parameter because DW_AT_friend attributes are /// also type references. @@ -297,19 +297,19 @@ public: void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, bool Minimal = false); - /// \brief Find existing DIE or create new DIE for the given type. + /// Find existing DIE or create new DIE for the given type. DIE *getOrCreateTypeDIE(const MDNode *N); - /// \brief Get context owner's DIE. + /// Get context owner's DIE. DIE *createTypeDIE(const DICompositeType *Ty); - /// \brief Get context owner's DIE. + /// Get context owner's DIE. DIE *getOrCreateContextDIE(const DIScope *Context); - /// \brief Construct DIEs for types that contain vtables. + /// Construct DIEs for types that contain vtables. void constructContainingTypeDIEs(); - /// \brief Construct function argument DIEs. + /// Construct function argument DIEs. void constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args); /// Create a DIE with the given Tag, add the DIE to its parent, and @@ -332,14 +332,14 @@ public: void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); protected: - /// \brief Create new static data member DIE. + /// Create new static data member DIE. DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT); /// Look up the source ID with the given directory and source file names. If /// none currently exists, create a new ID and insert it in the line table. virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0; - /// \brief Look in the DwarfDebug map for the MDNode that corresponds to the + /// Look in the DwarfDebug map for the MDNode that corresponds to the /// reference. template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { return DD->resolve(Ref); @@ -358,15 +358,15 @@ private: void constructTemplateValueParameterDIE(DIE &Buffer, const DITemplateValueParameter *TVP); - /// \brief Return the default lower bound for an array. + /// Return the default lower bound for an array. /// /// If the DWARF version doesn't handle the language, return -1. int64_t getDefaultLowerBound() const; - /// \brief Get an anonymous type for index type. + /// Get an anonymous type for index type. DIE *getIndexTyDie(); - /// \brief Set D as anonymous type for index which can be reused later. + /// Set D as anonymous type for index which can be reused later. void setIndexTyDie(DIE *D) { IndexTyDie = D; } /// If this is a named finished type then include it in the list of types for diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 1be3fd74d602..49ef8d3ddc8f 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -309,7 +309,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // If some instruction between the previous try-range and the end of the // function may throw, create a call-site entry with no landing pad for the // region following the try-range. - if (SawPotentiallyThrowing && !IsSJLJ) { + if (SawPotentiallyThrowing && !IsSJLJ && LastLabel != nullptr) { CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; CallSites.push_back(Site); } diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index 535b1f605853..6610ac78f8c4 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -97,7 +97,7 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol(); Asm->OutStreamer->EmitLabel(MCL); CurFn->Instrs.push_back(MCL); - InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine()); + InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine(), DL.getCol()); } WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP) @@ -264,22 +264,38 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { // Identify the function this subsection is for. Asm->OutStreamer->EmitCOFFSecRel32(Fn); Asm->OutStreamer->EmitCOFFSectionIndex(Fn); - // Insert padding after a 16-bit section index. - Asm->EmitInt16(0); + // Insert flags after a 16-bit section index. + Asm->EmitInt16(COFF::DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS); // Length of the function's code, in bytes. EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); // PC-to-linenumber lookup table: MCSymbol *FileSegmentEnd = nullptr; + + // The start of the last segment: + size_t LastSegmentStart = 0; + + auto FinishPreviousChunk = [&] { + if (!FileSegmentEnd) + return; + for (size_t ColSegI = LastSegmentStart, + ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart]; + ColSegI != ColSegEnd; ++ColSegI) { + unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber; + Asm->EmitInt16(ColumnNumber); // Start column + Asm->EmitInt16(ColumnNumber); // End column + } + Asm->OutStreamer->EmitLabel(FileSegmentEnd); + }; + for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) { MCSymbol *Instr = FI.Instrs[J]; assert(InstrInfo.count(Instr)); if (FilenameSegmentLengths.count(J)) { // We came to a beginning of a new filename segment. - if (FileSegmentEnd) - Asm->OutStreamer->EmitLabel(FileSegmentEnd); + FinishPreviousChunk(); StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename; assert(FileNameRegistry.Infos.count(CurFilename)); size_t IndexInStringTable = @@ -300,6 +316,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { // records. FileSegmentEnd = Asm->MMI->getContext().createTempSymbol(); EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd); + LastSegmentStart = J; } // The first PC with the given linenumber and the linenumber itself. @@ -307,8 +324,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { Asm->EmitInt32(InstrInfo[Instr].LineNumber); } - if (FileSegmentEnd) - Asm->OutStreamer->EmitLabel(FileSegmentEnd); + FinishPreviousChunk(); Asm->OutStreamer->EmitLabel(LineTableEnd); } diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h index a5b399f73707..43d1a432712e 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h @@ -52,11 +52,13 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler { struct InstrInfoTy { StringRef Filename; unsigned LineNumber; + unsigned ColumnNumber; - InstrInfoTy() : LineNumber(0) {} + InstrInfoTy() : LineNumber(0), ColumnNumber(0) {} - InstrInfoTy(StringRef Filename, unsigned LineNumber) - : Filename(Filename), LineNumber(LineNumber) {} + InstrInfoTy(StringRef Filename, unsigned LineNumber, unsigned ColumnNumber) + : Filename(Filename), LineNumber(LineNumber), + ColumnNumber(ColumnNumber) {} }; DenseMap<MCSymbol *, InstrInfoTy> InstrInfo; diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 79830bc3443b..71c77815e281 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -70,19 +70,27 @@ void WinException::beginFunction(const MachineFunction *MF) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); - const Function *Per = MMI->getPersonality(); + const Function *Per = nullptr; + if (F->hasPersonalityFn()) + Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); - shouldEmitPersonality = hasLandingPads && - PerEncoding != dwarf::DW_EH_PE_omit && Per; + bool forceEmitPersonality = + F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && + F->needsUnwindTableEntry(); + + shouldEmitPersonality = forceEmitPersonality || (hasLandingPads && + PerEncoding != dwarf::DW_EH_PE_omit && Per); unsigned LSDAEncoding = TLOF.getLSDAEncoding(); shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; - // If we're not using CFI, we don't want the CFI or the personality. Emit the - // LSDA if this is the parent function. + // If we're not using CFI, we don't want the CFI or the personality. If + // WinEHPrepare outlined something, we should emit the LSDA. if (!Asm->MAI->usesWindowsCFI()) { - shouldEmitLSDA = (hasLandingPads && F == ParentF); + bool HasOutlinedChildren = + F->hasFnAttribute("wineh-parent") && F == ParentF; + shouldEmitLSDA = HasOutlinedChildren; shouldEmitPersonality = false; return; } @@ -121,7 +129,10 @@ void WinException::endFunction(const MachineFunction *MF) { if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA) return; - EHPersonality Per = MMI->getPersonalityType(); + const Function *F = MF->getFunction(); + EHPersonality Per = EHPersonality::Unknown; + if (F->hasPersonalityFn()) + Per = classifyEHPersonality(F->getPersonalityFn()); // Get rid of any dead landing pads if we're not using a Windows EH scheme. In // Windows EH schemes, the landing pad is not actually reachable. It only @@ -350,6 +361,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { // EHFlags & 1 -> Synchronous exceptions only, no async exceptions. // EHFlags & 2 -> ??? // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue. + OS.EmitValueToAlignment(4); OS.EmitLabel(FuncInfoXData); OS.EmitIntValue(0x19930522, 4); // MagicNumber OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4); // MaxState @@ -555,7 +567,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, // we've code generated the parent, we can emit the label assignment that // those helpers use to get the offset of the registration node. assert(FuncInfo.EHRegNodeEscapeIndex != INT_MAX && - "no EH reg node frameescape index"); + "no EH reg node localescape index"); MCSymbol *ParentFrameOffset = Asm->OutContext.getOrCreateParentFrameOffsetSymbol(FLinkageName); MCSymbol *RegistrationOffsetSym = Asm->OutContext.getOrCreateFrameAllocSymbol( @@ -578,9 +590,11 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { // Emit the __ehtable label that we use for llvm.x86.seh.lsda. MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName); + OS.EmitValueToAlignment(4); OS.EmitLabel(LSDALabel); - const Function *Per = MMI->getPersonality(); + const Function *Per = + dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); StringRef PerName = Per->getName(); int BaseState = -1; if (PerName == "_except_handler4") { diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 82f5c482408a..db00910cd018 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -34,4 +34,5 @@ cl::opt<unsigned> cl::Hidden); BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F) - : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 70de4e7ebd11..6ab6acc03722 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -147,10 +147,13 @@ class TypePromotionTransaction; /// OptSize - True if optimizing for size. bool OptSize; + /// DataLayout for the Function being processed. + const DataLayout *DL; + public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) { + : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) { initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -203,6 +206,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (skipOptnoneFunction(F)) return false; + DL = &F.getParent()->getDataLayout(); + bool EverMadeChange = false; // Clear per function information. InsertedInsts.clear(); @@ -753,10 +758,11 @@ static bool SinkCast(CastInst *CI) { /// /// Return true if any changes are made. /// -static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ +static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, + const DataLayout &DL) { // If this is a noop copy, - EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(CI->getType()); + EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, CI->getType()); // This is an fp<->int conversion? if (SrcVT.isInteger() != DstVT.isInteger()) @@ -921,7 +927,7 @@ static bool isExtractBitsCandidateUse(Instruction *User) { static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, - const TargetLowering &TLI) { + const TargetLowering &TLI, const DataLayout &DL) { BasicBlock *UserBB = User->getParent(); DenseMap<BasicBlock *, CastInst *> InsertedTruncs; TruncInst *TruncI = dyn_cast<TruncInst>(User); @@ -947,7 +953,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, // approximation; some nodes' legality is determined by the // operand or other means. There's no good way to find out though. if (TLI.isOperationLegalOrCustom( - ISDOpcode, TLI.getValueType(TruncUser->getType(), true))) + ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true))) continue; // Don't bother for PHI nodes. @@ -1005,13 +1011,14 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, /// instruction. /// Return true if any changes are made. static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, - const TargetLowering &TLI) { + const TargetLowering &TLI, + const DataLayout &DL) { BasicBlock *DefBB = ShiftI->getParent(); /// Only insert instructions in each block once. DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts; - bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); + bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType())); bool MadeChange = false; for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); @@ -1048,9 +1055,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, if (isa<TruncInst>(User) && shiftIsLegal // If the type of the truncate is legal, no trucate will be // introduced in other basic blocks. - && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) + && + (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType())))) MadeChange = - SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); + SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL); continue; } @@ -1307,12 +1315,10 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { return true; } - const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; - // Align the pointer arguments to this call if the target thinks it's a good // idea unsigned MinSize, PrefAlign; - if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { + if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { for (auto &Arg : CI->arg_operands()) { // We want to align both objects whose address is used directly and // objects whose address is used in casts and GEPs, though it only makes @@ -1320,36 +1326,34 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { // if size - offset meets the size threshold. if (!Arg->getType()->isPointerTy()) continue; - APInt Offset(TD->getPointerSizeInBits( - cast<PointerType>(Arg->getType())->getAddressSpace()), 0); - Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset); + APInt Offset(DL->getPointerSizeInBits( + cast<PointerType>(Arg->getType())->getAddressSpace()), + 0); + Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); uint64_t Offset2 = Offset.getLimitedValue(); if ((Offset2 & (PrefAlign-1)) != 0) continue; AllocaInst *AI; - if ((AI = dyn_cast<AllocaInst>(Val)) && - AI->getAlignment() < PrefAlign && - TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) + if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign && + DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) AI->setAlignment(PrefAlign); // Global variables can only be aligned if they are defined in this // object (i.e. they are uniquely initialized in this object), and // over-aligning global variables that have an explicit section is // forbidden. GlobalVariable *GV; - if ((GV = dyn_cast<GlobalVariable>(Val)) && - GV->hasUniqueInitializer() && - !GV->hasSection() && - GV->getAlignment() < PrefAlign && - TD->getTypeAllocSize( - GV->getType()->getElementType()) >= MinSize + Offset2) + if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->hasUniqueInitializer() && + !GV->hasSection() && GV->getAlignment() < PrefAlign && + DL->getTypeAllocSize(GV->getType()->getElementType()) >= + MinSize + Offset2) GV->setAlignment(PrefAlign); } // If this is a memcpy (or similar) then we may be able to improve the // alignment if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { - unsigned Align = getKnownAlignment(MI->getDest(), *TD); + unsigned Align = getKnownAlignment(MI->getDest(), *DL); if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) - Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD)); + Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL)); if (Align > MI->getAlignment()) MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); } @@ -2099,6 +2103,7 @@ class AddressingModeMatcher { SmallVectorImpl<Instruction*> &AddrModeInsts; const TargetMachine &TM; const TargetLowering &TLI; + const DataLayout &DL; /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and /// the memory instruction that we're computing this address for. @@ -2131,8 +2136,9 @@ class AddressingModeMatcher { : AddrModeInsts(AMI), TM(TM), TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent()) ->getTargetLowering()), - AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), - InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT) { + DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS), + MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), + PromotedInsts(PromotedInsts), TPT(TPT) { IgnoreProfitability = false; } public: @@ -2199,7 +2205,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, TestAddrMode.ScaledReg = ScaleReg; // If the new address isn't legal, bail out. - if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace)) + if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) return false; // It was legal, so commit it. @@ -2216,7 +2222,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, // If this addressing mode is legal, commit it and remember that we folded // this instruction. - if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace)) { + if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) { AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); AddrMode = TestAddrMode; return true; @@ -2262,7 +2268,8 @@ static bool MightBeFoldableInst(Instruction *I) { /// \note \p Val is assumed to be the product of some type promotion. /// Therefore if \p Val has an undefined state in \p TLI, this is assumed /// to be legal, as the non-promoted value would have had the same state. -static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) { +static bool isPromotedInstructionLegal(const TargetLowering &TLI, + const DataLayout &DL, Value *Val) { Instruction *PromotedInst = dyn_cast<Instruction>(Val); if (!PromotedInst) return false; @@ -2272,7 +2279,7 @@ static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) { return true; // Otherwise, check if the promoted instruction is legal or not. return TLI.isOperationLegalOrCustom( - ISDOpcode, TLI.getValueType(PromotedInst->getType())); + ISDOpcode, TLI.getValueType(DL, PromotedInst->getType())); } /// \brief Hepler class to perform type promotion. @@ -2646,7 +2653,7 @@ bool AddressingModeMatcher::IsPromotionProfitable( // The promotion is neutral but it may help folding the sign extension in // loads for instance. // Check that we did not create an illegal instruction. - return isPromotedInstructionLegal(TLI, PromotedOperand); + return isPromotedInstructionLegal(TLI, DL, PromotedOperand); } /// MatchOperationAddr - Given an instruction or constant expr, see if we can @@ -2674,12 +2681,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, case Instruction::PtrToInt: // PtrToInt is always a noop, as we know that the int type is pointer sized. return MatchAddr(AddrInst->getOperand(0), Depth); - case Instruction::IntToPtr: + case Instruction::IntToPtr: { + auto AS = AddrInst->getType()->getPointerAddressSpace(); + auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); // This inttoptr is a no-op if the integer type is pointer sized. - if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == - TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace())) + if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy) return MatchAddr(AddrInst->getOperand(0), Depth); return false; + } case Instruction::BitCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). @@ -2752,16 +2761,15 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, unsigned VariableScale = 0; int64_t ConstantOffset = 0; - const DataLayout *TD = TLI.getDataLayout(); gep_type_iterator GTI = gep_type_begin(AddrInst); for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { if (StructType *STy = dyn_cast<StructType>(*GTI)) { - const StructLayout *SL = TD->getStructLayout(STy); + const StructLayout *SL = DL.getStructLayout(STy); unsigned Idx = cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); ConstantOffset += SL->getElementOffset(Idx); } else { - uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); + uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { ConstantOffset += CI->getSExtValue()*TypeSize; } else if (TypeSize) { // Scales of zero don't do anything. @@ -2781,7 +2789,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, if (VariableOperand == -1) { AddrMode.BaseOffs += ConstantOffset; if (ConstantOffset == 0 || - TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) { + TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { // Check to see if we can fold the base pointer in too. if (MatchAddr(AddrInst->getOperand(0), Depth+1)) return true; @@ -2904,14 +2912,14 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { // Fold in immediates if legal for the target. AddrMode.BaseOffs += CI->getSExtValue(); - if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) + if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) return true; AddrMode.BaseOffs -= CI->getSExtValue(); } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { // If this is a global variable, try to fold it into the addressing mode. if (!AddrMode.BaseGV) { AddrMode.BaseGV = GV; - if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) + if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) return true; AddrMode.BaseGV = nullptr; } @@ -2955,7 +2963,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { AddrMode.HasBaseReg = true; AddrMode.BaseReg = Addr; // Still check for legality in case the target supports [imm] but not [i+r]. - if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) + if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) return true; AddrMode.HasBaseReg = false; AddrMode.BaseReg = nullptr; @@ -2965,7 +2973,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (AddrMode.Scale == 0) { AddrMode.Scale = 1; AddrMode.ScaledReg = Addr; - if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) + if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) return true; AddrMode.Scale = 0; AddrMode.ScaledReg = nullptr; @@ -2984,7 +2992,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering(); const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = - TLI->ParseConstraints(TRI, ImmutableCallSite(CI)); + TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI, + ImmutableCallSite(CI)); for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; @@ -3324,7 +3333,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); - Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); + Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); Value *ResultPtr = nullptr, *ResultIndex = nullptr; // First, find the pointer. @@ -3443,7 +3452,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); - Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); + Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); Value *Result = nullptr; // Start with the base register. Do this first so that subsequent address @@ -3545,8 +3554,8 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { const TargetRegisterInfo *TRI = TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo(); - TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI->ParseConstraints(TRI, CS); + TargetLowering::AsmOperandInfoVector TargetConstraints = + TLI->ParseConstraints(*DL, TRI, CS); unsigned ArgNo = 0; for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; @@ -3680,7 +3689,7 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, TotalCreatedInstsCost -= ExtCost; if (!StressExtLdPromotion && (TotalCreatedInstsCost > 1 || - !isPromotedInstructionLegal(*TLI, PromotedVal))) { + !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) { // The promotion is not profitable, rollback to the previous state. TPT.rollback(LastKnownGood); continue; @@ -3735,8 +3744,8 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) { if (!HasPromoted && LI->getParent() == I->getParent()) return false; - EVT VT = TLI->getValueType(I->getType()); - EVT LoadVT = TLI->getValueType(LI->getType()); + EVT VT = TLI->getValueType(*DL, I->getType()); + EVT LoadVT = TLI->getValueType(*DL, LI->getType()); // If the load has other users and the truncate is not free, this probably // isn't worthwhile. @@ -4013,6 +4022,9 @@ namespace { /// Assuming both extractelement and store can be combine, we get rid of the /// transition. class VectorPromoteHelper { + /// DataLayout associated with the current module. + const DataLayout &DL; + /// Used to perform some checks on the legality of vector operations. const TargetLowering &TLI; @@ -4086,7 +4098,8 @@ class VectorPromoteHelper { unsigned Align = ST->getAlignment(); // Check if this store is supported. if (!TLI.allowsMisalignedMemoryAccesses( - TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) { + TLI.getValueType(DL, ST->getValueOperand()->getType()), AS, + Align)) { // If this is not supported, there is no way we can combine // the extract with the store. return false; @@ -4181,9 +4194,10 @@ class VectorPromoteHelper { } public: - VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI, - Instruction *Transition, unsigned CombineCost) - : TLI(TLI), TTI(TTI), Transition(Transition), + VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI, + const TargetTransformInfo &TTI, Instruction *Transition, + unsigned CombineCost) + : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition), StoreExtractCombineCost(CombineCost), CombineInst(nullptr) { assert(Transition && "Do not know how to promote null"); } @@ -4219,7 +4233,7 @@ public: return false; return StressStoreExtract || TLI.isOperationLegalOrCustom( - ISDOpcode, TLI.getValueType(getTransitionType(), true)); + ISDOpcode, TLI.getValueType(DL, getTransitionType(), true)); } /// \brief Check whether or not \p Use can be combined @@ -4323,7 +4337,7 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) { // we do not do that for now. BasicBlock *Parent = Inst->getParent(); DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); - VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost); + VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost); // If the transition has more than one use, assume this is not going to be // beneficial. while (Inst->hasOneUse()) { @@ -4368,8 +4382,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - const DataLayout &DL = I->getModule()->getDataLayout(); - if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) { + if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) { P->replaceAllUsesWith(V); P->eraseFromParent(); ++NumPHIsElim; @@ -4388,15 +4401,16 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { if (isa<Constant>(CI->getOperand(0))) return false; - if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) + if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL)) return true; if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { /// Sink a zext or sext into its user blocks if the target type doesn't /// fit in one register - if (TLI && TLI->getTypeAction(CI->getContext(), - TLI->getValueType(CI->getType())) == - TargetLowering::TypeExpandInteger) { + if (TLI && + TLI->getTypeAction(CI->getContext(), + TLI->getValueType(*DL, CI->getType())) == + TargetLowering::TypeExpandInteger) { return SinkCast(CI); } else { bool MadeChange = MoveExtToFormExtLoad(I); @@ -4433,7 +4447,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { BinOp->getOpcode() == Instruction::LShr)) { ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); if (TLI && CI && TLI->hasExtractBitsInsn()) - return OptimizeExtractBits(BinOp, CI, *TLI); + return OptimizeExtractBits(BinOp, CI, *TLI, *DL); return false; } diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 963d573ea7f0..941129b5cc95 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -60,7 +60,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { return false; // Don't delete frame allocation labels. - if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) + if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE) return false; // Don't delete instructions with side effects. diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 5b09cf1a0fd7..201f9c150083 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -733,12 +733,14 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { // If no relevant registers are used in the function, we can skip it // completely. bool anyregs = false; + const MachineRegisterInfo &MRI = mf.getRegInfo(); for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); - I != E; ++I) - if (MF->getRegInfo().isPhysRegUsed(*I)) { - anyregs = true; - break; - } + I != E && !anyregs; ++I) + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) + if (!MRI.reg_nodbg_empty(*AI)) { + anyregs = true; + break; + } if (!anyregs) return false; // Initialize the AliasMap on the first use. diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 37b3bf17ed1f..6f9e8394081e 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -117,7 +117,6 @@ STATISTIC(NumMerged, "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { const TargetMachine *TM; - const DataLayout *DL; // FIXME: Infer the maximum possible offset depending on the actual users // (these max offsets are different for the users inside Thumb or ARM // functions), see the code that passes in the offset in the ARM backend @@ -160,8 +159,8 @@ namespace { explicit GlobalMerge(const TargetMachine *TM = nullptr, unsigned MaximalOffset = 0, bool OnlyOptimizeForSize = false) - : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()), - MaxOffset(MaximalOffset), OnlyOptimizeForSize(OnlyOptimizeForSize) { + : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset), + OnlyOptimizeForSize(OnlyOptimizeForSize) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -188,14 +187,16 @@ INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables", bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const { + auto &DL = M.getDataLayout(); // FIXME: Find better heuristics - std::stable_sort(Globals.begin(), Globals.end(), - [this](const GlobalVariable *GV1, const GlobalVariable *GV2) { - Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); - Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); + std::stable_sort( + Globals.begin(), Globals.end(), + [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { + Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); + Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); - return (DL->getTypeAllocSize(Ty1) < DL->getTypeAllocSize(Ty2)); - }); + return (DL.getTypeAllocSize(Ty1) < DL.getTypeAllocSize(Ty2)); + }); // If we want to just blindly group all globals together, do so. if (!GlobalMergeGroupByUse) { @@ -410,6 +411,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals, unsigned AddrSpace) const { Type *Int32Ty = Type::getInt32Ty(M.getContext()); + auto &DL = M.getDataLayout(); assert(Globals.size() > 1); @@ -427,7 +429,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals, GlobalVariable *TheFirstExternal = 0; for (j = i; j != -1; j = GlobalSet.find_next(j)) { Type *Ty = Globals[j]->getType()->getElementType(); - MergedSize += DL->getTypeAllocSize(Ty); + MergedSize += DL.getTypeAllocSize(Ty); if (MergedSize > MaxOffset) { break; } @@ -526,6 +528,7 @@ bool GlobalMerge::doInitialization(Module &M) { if (!EnableGlobalMerge) return false; + auto &DL = M.getDataLayout(); DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, BSSGlobals; bool Changed = false; @@ -548,9 +551,9 @@ bool GlobalMerge::doInitialization(Module &M) { unsigned AddressSpace = PT->getAddressSpace(); // Ignore fancy-aligned globals for now. - unsigned Alignment = DL->getPreferredAlignment(I); + unsigned Alignment = DL.getPreferredAlignment(I); Type *Ty = I->getType()->getElementType(); - if (Alignment > DL->getABITypeAlignment(Ty)) + if (Alignment > DL.getABITypeAlignment(Ty)) continue; // Ignore all 'special' globals. @@ -562,7 +565,7 @@ bool GlobalMerge::doInitialization(Module &M) { if (isMustKeepGlobalVariable(I)) continue; - if (DL->getTypeAllocSize(Ty) < MaxOffset) { + if (DL.getTypeAllocSize(Ty) < MaxOffset) { if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal()) BSSGlobals[AddressSpace].push_back(I); else if (I->isConstant()) diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index a02cd67ac649..93e04876a8f3 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -25,9 +25,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -47,6 +50,11 @@ static cl::opt<unsigned> PageSize("imp-null-check-page-size", "bytes"), cl::init(4096)); +#define DEBUG_TYPE "implicit-null-checks" + +STATISTIC(NumImplicitNullChecks, + "Number of explicit null checks made implicit"); + namespace { class ImplicitNullChecks : public MachineFunctionPass { @@ -171,6 +179,9 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // callq throw_NullPointerException // // LblNotNull: + // Inst0 + // Inst1 + // ... // Def = Load (%RAX + <offset>) // ... // @@ -181,6 +192,8 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // jmp LblNotNull ;; explicit or fallthrough // // LblNotNull: + // Inst0 + // Inst1 // ... // // LblNull: @@ -188,15 +201,75 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // unsigned PointerReg = MBP.LHS.getReg(); - MachineInstr *MemOp = &*NotNullSucc->begin(); - unsigned BaseReg, Offset; - if (TII->getMemOpBaseRegImmOfs(MemOp, BaseReg, Offset, TRI)) - if (MemOp->mayLoad() && !MemOp->isPredicable() && BaseReg == PointerReg && - Offset < PageSize && MemOp->getDesc().getNumDefs() == 1) { - NullCheckList.emplace_back(MemOp, MBP.ConditionDef, &MBB, NotNullSucc, - NullSucc); - return true; + + // As we scan NotNullSucc for a suitable load instruction, we keep track of + // the registers defined and used by the instructions we scan past. This bit + // of information lets us decide if it is legal to hoist the load instruction + // we find (if we do find such an instruction) to before NotNullSucc. + DenseSet<unsigned> RegDefs, RegUses; + + // Returns true if it is safe to reorder MI to before NotNullSucc. + auto IsSafeToHoist = [&](MachineInstr *MI) { + // Right now we don't want to worry about LLVM's memory model. This can be + // made more precise later. + for (auto *MMO : MI->memoperands()) + if (!MMO->isUnordered()) + return false; + + for (auto &MO : MI->operands()) { + if (MO.isReg() && MO.getReg()) { + for (unsigned Reg : RegDefs) + if (TRI->regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-write or read-after-write + + if (MO.isDef()) + for (unsigned Reg : RegUses) + if (TRI->regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-read + } + } + + return true; + }; + + for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE; + ++MII) { + MachineInstr *MI = &*MII; + unsigned BaseReg, Offset; + if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) + if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg && + Offset < PageSize && MI->getDesc().getNumDefs() == 1 && + IsSafeToHoist(MI)) { + NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc, + NullSucc); + return true; + } + + // MI did not match our criteria for conversion to a trapping load. Check + // if we can continue looking. + + if (MI->mayStore() || MI->hasUnmodeledSideEffects()) + return false; + + for (auto *MMO : MI->memoperands()) + // Right now we don't want to worry about LLVM's memory model. + if (!MMO->isUnordered()) + return false; + + // It _may_ be okay to reorder a later load instruction across MI. Make a + // note of its operands so that we can make the legality check if we find a + // suitable load instruction: + + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.getReg()) + continue; + + if (MO.isDef()) + RegDefs.insert(MO.getReg()); + else + RegUses.insert(MO.getReg()); } + } return false; } @@ -247,7 +320,7 @@ void ImplicitNullChecks::rewriteNullChecks( // touch the successors list for any basic block since we haven't changed // control flow, we've just made it implicit. insertFaultingLoad(NC.MemOperation, NC.CheckBlock, HandlerLabel); - NC.MemOperation->removeFromParent(); + NC.MemOperation->eraseFromParent(); NC.CheckOperation->eraseFromParent(); // Insert an *unconditional* branch to not-null successor. @@ -257,6 +330,8 @@ void ImplicitNullChecks::rewriteNullChecks( // Emit the HandlerLabel as an EH_LABEL. BuildMI(*NC.NullSucc, NC.NullSucc->begin(), DL, TII->get(TargetOpcode::EH_LABEL)).addSym(HandlerLabel); + + NumImplicitNullChecks++; } } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index b486bdc91453..37299eb664cf 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -90,8 +90,8 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() { /// addPassesToX helper drives creation and initialization of TargetPassConfig. static MCContext * addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, - bool DisableVerify, AnalysisID StartAfter, - AnalysisID StopAfter, + bool DisableVerify, AnalysisID StartBefore, + AnalysisID StartAfter, AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer = nullptr) { // Add internal analysis passes from the target machine. @@ -100,7 +100,7 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, // Targets may override createPassConfig to provide a target-specific // subclass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); - PassConfig->setStartStopPasses(StartAfter, StopAfter); + PassConfig->setStartStopPasses(StartBefore, StartAfter, StopAfter); // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); @@ -143,11 +143,12 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, bool LLVMTargetMachine::addPassesToEmitFile( PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, - bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter, - MachineFunctionInitializer *MFInitializer) { + bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, + AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) { // Add common CodeGen passes. - MCContext *Context = addPassesToGenerateCode( - this, PM, DisableVerify, StartAfter, StopAfter, MFInitializer); + MCContext *Context = + addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter, + StopAfter, MFInitializer); if (!Context) return true; @@ -231,7 +232,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_pwrite_stream &Out, bool DisableVerify) { // Add common CodeGen passes. - Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr); + Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr, + nullptr); if (!Ctx) return true; diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index 154ce6fc122b..000151acd735 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -15,12 +15,12 @@ #include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -49,7 +49,6 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const { bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); - MRI = &MF.getRegInfo(); LIS = &getAnalysis<LiveIntervals>(); VRM = &getAnalysis<VirtRegMap>(); @@ -101,7 +100,6 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { << " to " << PrintReg(PhysReg, TRI) << ':'); assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); VRM->assignVirt2Phys(VirtReg.reg, PhysReg); - MRI->setPhysRegUsed(PhysReg); foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) { @@ -131,6 +129,14 @@ void LiveRegMatrix::unassign(LiveInterval &VirtReg) { DEBUG(dbgs() << '\n'); } +bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const { + for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { + if (!Matrix[*Unit].empty()) + return true; + } + return false; +} + bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg) { // Check if the cached information is valid. diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index e9b3916a11fa..482c33ae2235 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "MILexer.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include <cctype> @@ -64,6 +65,17 @@ static bool isIdentifierChar(char C) { return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.'; } +static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { + return StringSwitch<MIToken::TokenKind>(Identifier) + .Case("_", MIToken::underscore) + .Case("implicit", MIToken::kw_implicit) + .Case("implicit-def", MIToken::kw_implicit_define) + .Case("dead", MIToken::kw_dead) + .Case("killed", MIToken::kw_killed) + .Case("undef", MIToken::kw_undef) + .Default(MIToken::Identifier); +} + static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { if (!isalpha(C.peek()) && C.peek() != '_') return None; @@ -71,8 +83,7 @@ static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { while (isIdentifierChar(C.peek())) C.advance(); auto Identifier = Range.upto(C); - Token = MIToken(Identifier == "_" ? MIToken::underscore : MIToken::Identifier, - Identifier); + Token = MIToken(getIdentifierKind(Identifier), Identifier); return C; } @@ -104,9 +115,22 @@ static Cursor maybeLexMachineBasicBlock( return C; } +static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { + auto Range = C; + C.advance(); // Skip '%' + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + Token = MIToken(MIToken::VirtualRegister, Range.upto(C), + APSInt(NumberRange.upto(C))); + return C; +} + static Cursor maybeLexRegister(Cursor C, MIToken &Token) { if (C.peek() != '%') return None; + if (isdigit(C.peek(1))) + return lexVirtualRegister(C, Token); auto Range = C; C.advance(); // Skip '%' while (isIdentifierChar(C.peek())) @@ -155,6 +179,8 @@ static MIToken::TokenKind symbolToken(char C) { return MIToken::comma; case '=': return MIToken::equal; + case ':': + return MIToken::colon; default: return MIToken::Error; } diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index c28935f38909..55460b56e7d6 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -35,6 +35,14 @@ struct MIToken { comma, equal, underscore, + colon, + + // Keywords + kw_implicit, + kw_implicit_define, + kw_dead, + kw_killed, + kw_undef, // Identifier tokens Identifier, @@ -44,7 +52,8 @@ struct MIToken { GlobalValue, // Other tokens - IntegerLiteral + IntegerLiteral, + VirtualRegister }; private: @@ -66,7 +75,13 @@ public: bool isError() const { return Kind == Error; } bool isRegister() const { - return Kind == NamedRegister || Kind == underscore; + return Kind == NamedRegister || Kind == underscore || + Kind == VirtualRegister; + } + + bool isRegisterFlag() const { + return Kind == kw_implicit || Kind == kw_implicit_define || + Kind == kw_dead || Kind == kw_killed || Kind == kw_undef; } bool is(TokenKind K) const { return Kind == K; } @@ -81,7 +96,7 @@ public: bool hasIntegerValue() const { return Kind == IntegerLiteral || Kind == MachineBasicBlock || - Kind == GlobalValue; + Kind == GlobalValue || Kind == VirtualRegister; } }; diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index b618e53b8e43..c00011288a60 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/SourceMgr.h" @@ -28,14 +29,25 @@ using namespace llvm; namespace { +/// A wrapper struct around the 'MachineOperand' struct that includes a source +/// range. +struct MachineOperandWithLocation { + MachineOperand Operand; + StringRef::iterator Begin; + StringRef::iterator End; + + MachineOperandWithLocation(const MachineOperand &Operand, + StringRef::iterator Begin, StringRef::iterator End) + : Operand(Operand), Begin(Begin), End(End) {} +}; + class MIParser { SourceMgr &SM; MachineFunction &MF; SMDiagnostic &Error; StringRef Source, CurrentSource; MIToken Token; - /// Maps from basic block numbers to MBBs. - const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots; + const PerFunctionMIParsingState &PFS; /// Maps from indices to unnamed global values and metadata nodes. const SlotMapping &IRSlots; /// Maps from instruction names to op codes. @@ -44,11 +56,12 @@ class MIParser { StringMap<unsigned> Names2Regs; /// Maps from register mask names to register masks. StringMap<const uint32_t *> Names2RegMasks; + /// Maps from subregister names to subregister indices. + StringMap<unsigned> Names2SubRegIndices; public: MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, - StringRef Source, - const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots, + StringRef Source, const PerFunctionMIParsingState &PFS, const SlotMapping &IRSlots); void lex(); @@ -65,8 +78,11 @@ public: bool parse(MachineInstr *&MI); bool parseMBB(MachineBasicBlock *&MBB); + bool parseNamedRegister(unsigned &Reg); bool parseRegister(unsigned &Reg); + bool parseRegisterFlag(unsigned &Flags); + bool parseSubRegisterIndex(unsigned &SubReg); bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false); bool parseImmediateOperand(MachineOperand &Dest); bool parseMBBReference(MachineBasicBlock *&MBB); @@ -88,6 +104,9 @@ private: bool parseInstruction(unsigned &OpCode); + bool verifyImplicitOperands(ArrayRef<MachineOperandWithLocation> Operands, + const MCInstrDesc &MCID); + void initNames2Regs(); /// Try to convert a register name to a register number. Return true if the @@ -100,17 +119,22 @@ private: /// /// Return null if the identifier isn't a register mask. const uint32_t *getRegMask(StringRef Identifier); + + void initNames2SubRegIndices(); + + /// Check if the given identifier is a name of a subregister index. + /// + /// Return 0 if the name isn't a subregister index class. + unsigned getSubRegIndex(StringRef Name); }; } // end anonymous namespace MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, - StringRef Source, - const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots, + StringRef Source, const PerFunctionMIParsingState &PFS, const SlotMapping &IRSlots) : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), - Token(MIToken::Error, StringRef()), MBBSlots(MBBSlots), IRSlots(IRSlots) { -} + Token(MIToken::Error, StringRef()), PFS(PFS), IRSlots(IRSlots) {} void MIParser::lex() { CurrentSource = lexMIToken( @@ -121,8 +145,6 @@ void MIParser::lex() { bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { - // TODO: Get the proper location in the MIR file, not just a location inside - // the string. assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); Error = SMDiagnostic( SM, SMLoc(), @@ -137,11 +159,12 @@ bool MIParser::parse(MachineInstr *&MI) { // Parse any register operands before '=' // TODO: Allow parsing of multiple operands before '=' MachineOperand MO = MachineOperand::CreateImm(0); - SmallVector<MachineOperand, 8> Operands; - if (Token.isRegister()) { + SmallVector<MachineOperandWithLocation, 8> Operands; + if (Token.isRegister() || Token.isRegisterFlag()) { + auto Loc = Token.location(); if (parseRegisterOperand(MO, /*IsDef=*/true)) return true; - Operands.push_back(MO); + Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); if (Token.isNot(MIToken::equal)) return error("expected '='"); lex(); @@ -155,9 +178,10 @@ bool MIParser::parse(MachineInstr *&MI) { // Parse the remaining machine operands. while (Token.isNot(MIToken::Eof)) { + auto Loc = Token.location(); if (parseMachineOperand(MO)) return true; - Operands.push_back(MO); + Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location())); if (Token.is(MIToken::Eof)) break; if (Token.isNot(MIToken::comma)) @@ -166,25 +190,16 @@ bool MIParser::parse(MachineInstr *&MI) { } const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); - - // Verify machine operands. if (!MCID.isVariadic()) { - for (size_t I = 0, E = Operands.size(); I < E; ++I) { - if (I < MCID.getNumOperands()) - continue; - // Mark this register as implicit to prevent an assertion when it's added - // to an instruction. This is a temporary workaround until the implicit - // register flag can be parsed. - if (Operands[I].isReg()) - Operands[I].setImplicit(); - } + // FIXME: Move the implicit operand verification to the machine verifier. + if (verifyImplicitOperands(Operands, MCID)) + return true; } - // TODO: Determine the implicit behaviour when implicit register flags are - // parsed. + // TODO: Check for extraneous machine operands. MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true); for (const auto &Operand : Operands) - MI->addOperand(MF, Operand); + MI->addOperand(MF, Operand.Operand); return false; } @@ -201,6 +216,80 @@ bool MIParser::parseMBB(MachineBasicBlock *&MBB) { return false; } +bool MIParser::parseNamedRegister(unsigned &Reg) { + lex(); + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + if (parseRegister(Reg)) + return 0; + lex(); + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the register reference"); + return false; +} + +static const char *printImplicitRegisterFlag(const MachineOperand &MO) { + assert(MO.isImplicit()); + return MO.isDef() ? "implicit-def" : "implicit"; +} + +static std::string getRegisterName(const TargetRegisterInfo *TRI, + unsigned Reg) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg"); + return StringRef(TRI->getName(Reg)).lower(); +} + +bool MIParser::verifyImplicitOperands( + ArrayRef<MachineOperandWithLocation> Operands, const MCInstrDesc &MCID) { + if (MCID.isCall()) + // We can't verify call instructions as they can contain arbitrary implicit + // register and register mask operands. + return false; + + // Gather all the expected implicit operands. + SmallVector<MachineOperand, 4> ImplicitOperands; + if (MCID.ImplicitDefs) + for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) + ImplicitOperands.push_back( + MachineOperand::CreateReg(*ImpDefs, true, true)); + if (MCID.ImplicitUses) + for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) + ImplicitOperands.push_back( + MachineOperand::CreateReg(*ImpUses, false, true)); + + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + size_t I = ImplicitOperands.size(), J = Operands.size(); + while (I) { + --I; + if (J) { + --J; + const auto &ImplicitOperand = ImplicitOperands[I]; + const auto &Operand = Operands[J].Operand; + if (ImplicitOperand.isIdenticalTo(Operand)) + continue; + if (Operand.isReg() && Operand.isImplicit()) { + return error(Operands[J].Begin, + Twine("expected an implicit register operand '") + + printImplicitRegisterFlag(ImplicitOperand) + " %" + + getRegisterName(TRI, ImplicitOperand.getReg()) + "'"); + } + } + // TODO: Fix source location when Operands[J].end is right before '=', i.e: + // insead of reporting an error at this location: + // %eax = MOV32r0 + // ^ + // report the error at the following location: + // %eax = MOV32r0 + // ^ + return error(J < Operands.size() ? Operands[J].End : Token.location(), + Twine("missing implicit register operand '") + + printImplicitRegisterFlag(ImplicitOperands[I]) + " %" + + getRegisterName(TRI, ImplicitOperands[I].getReg()) + "'"); + } + return false; +} + bool MIParser::parseInstruction(unsigned &OpCode) { if (Token.isNot(MIToken::Identifier)) return error("expected a machine instruction"); @@ -222,6 +311,17 @@ bool MIParser::parseRegister(unsigned &Reg) { return error(Twine("unknown register name '") + Name + "'"); break; } + case MIToken::VirtualRegister: { + unsigned ID; + if (getUnsigned(ID)) + return true; + const auto RegInfo = PFS.VirtualRegisterSlots.find(ID); + if (RegInfo == PFS.VirtualRegisterSlots.end()) + return error(Twine("use of undefined virtual register '%") + Twine(ID) + + "'"); + Reg = RegInfo->second; + break; + } // TODO: Parse other register kinds. default: llvm_unreachable("The current token should be a register"); @@ -229,14 +329,66 @@ bool MIParser::parseRegister(unsigned &Reg) { return false; } +bool MIParser::parseRegisterFlag(unsigned &Flags) { + switch (Token.kind()) { + case MIToken::kw_implicit: + Flags |= RegState::Implicit; + break; + case MIToken::kw_implicit_define: + Flags |= RegState::ImplicitDefine; + break; + case MIToken::kw_dead: + Flags |= RegState::Dead; + break; + case MIToken::kw_killed: + Flags |= RegState::Kill; + break; + case MIToken::kw_undef: + Flags |= RegState::Undef; + break; + // TODO: report an error when we specify the same flag more than once. + // TODO: parse the other register flags. + default: + llvm_unreachable("The current token should be a register flag"); + } + lex(); + return false; +} + +bool MIParser::parseSubRegisterIndex(unsigned &SubReg) { + assert(Token.is(MIToken::colon)); + lex(); + if (Token.isNot(MIToken::Identifier)) + return error("expected a subregister index after ':'"); + auto Name = Token.stringValue(); + SubReg = getSubRegIndex(Name); + if (!SubReg) + return error(Twine("use of unknown subregister index '") + Name + "'"); + lex(); + return false; +} + bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) { unsigned Reg; - // TODO: Parse register flags. + unsigned Flags = IsDef ? RegState::Define : 0; + while (Token.isRegisterFlag()) { + if (parseRegisterFlag(Flags)) + return true; + } + if (!Token.isRegister()) + return error("expected a register after register flags"); if (parseRegister(Reg)) return true; lex(); - // TODO: Parse subregister. - Dest = MachineOperand::CreateReg(Reg, IsDef); + unsigned SubReg = 0; + if (Token.is(MIToken::colon)) { + if (parseSubRegisterIndex(SubReg)) + return true; + } + Dest = MachineOperand::CreateReg( + Reg, Flags & RegState::Define, Flags & RegState::Implicit, + Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef, + /*isEarlyClobber=*/false, SubReg); return false; } @@ -266,8 +418,8 @@ bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { unsigned Number; if (getUnsigned(Number)) return true; - auto MBBInfo = MBBSlots.find(Number); - if (MBBInfo == MBBSlots.end()) + auto MBBInfo = PFS.MBBSlots.find(Number); + if (MBBInfo == PFS.MBBSlots.end()) return error(Twine("use of undefined machine basic block #") + Twine(Number)); MBB = MBBInfo->second; @@ -318,8 +470,14 @@ bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { bool MIParser::parseMachineOperand(MachineOperand &Dest) { switch (Token.kind()) { + case MIToken::kw_implicit: + case MIToken::kw_implicit_define: + case MIToken::kw_dead: + case MIToken::kw_killed: + case MIToken::kw_undef: case MIToken::underscore: case MIToken::NamedRegister: + case MIToken::VirtualRegister: return parseRegisterOperand(Dest); case MIToken::IntegerLiteral: return parseImmediateOperand(Dest); @@ -408,16 +566,41 @@ const uint32_t *MIParser::getRegMask(StringRef Identifier) { return RegMaskInfo->getValue(); } -bool llvm::parseMachineInstr( - MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF, StringRef Src, - const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots, - const SlotMapping &IRSlots, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, MBBSlots, IRSlots).parse(MI); +void MIParser::initNames2SubRegIndices() { + if (!Names2SubRegIndices.empty()) + return; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I) + Names2SubRegIndices.insert( + std::make_pair(StringRef(TRI->getSubRegIndexName(I)).lower(), I)); +} + +unsigned MIParser::getSubRegIndex(StringRef Name) { + initNames2SubRegIndices(); + auto SubRegInfo = Names2SubRegIndices.find(Name); + if (SubRegInfo == Names2SubRegIndices.end()) + return 0; + return SubRegInfo->getValue(); +} + +bool llvm::parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parse(MI); +} + +bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseMBB(MBB); } -bool llvm::parseMBBReference( - MachineBasicBlock *&MBB, SourceMgr &SM, MachineFunction &MF, StringRef Src, - const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots, - const SlotMapping &IRSlots, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, MBBSlots, IRSlots).parseMBB(MBB); +bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseNamedRegister(Reg); } diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h index 4d6d4e700217..fca4c4e6f885 100644 --- a/lib/CodeGen/MIRParser/MIParser.h +++ b/lib/CodeGen/MIRParser/MIParser.h @@ -26,16 +26,26 @@ struct SlotMapping; class SMDiagnostic; class SourceMgr; +struct PerFunctionMIParsingState { + DenseMap<unsigned, MachineBasicBlock *> MBBSlots; + DenseMap<unsigned, unsigned> VirtualRegisterSlots; +}; + bool parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF, - StringRef Src, - const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots, + StringRef Src, const PerFunctionMIParsingState &PFS, const SlotMapping &IRSlots, SMDiagnostic &Error); bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, MachineFunction &MF, StringRef Src, - const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots, + const PerFunctionMIParsingState &PFS, const SlotMapping &IRSlots, SMDiagnostic &Error); +bool parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error); + } // end namespace llvm #endif diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index 397458300782..16b0e1655891 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -21,6 +21,7 @@ #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/IR/BasicBlock.h" @@ -48,6 +49,8 @@ class MIRParserImpl { LLVMContext &Context; StringMap<std::unique_ptr<yaml::MachineFunction>> Functions; SlotMapping IRSlots; + /// Maps from register class names to register classes. + StringMap<const TargetRegisterClass *> Names2RegClasses; public: MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename, @@ -60,6 +63,11 @@ public: /// Always returns true. bool error(const Twine &Message); + /// Report an error with the given message at the given location. + /// + /// Always returns true. + bool error(SMLoc Loc, const Twine &Message); + /// Report a given error with the location translated from the location in an /// embedded string literal to a location in the MIR file. /// @@ -90,13 +98,18 @@ public: /// Initialize the machine basic block using it's YAML representation. /// /// Return true if an error occurred. - bool initializeMachineBasicBlock( - MachineFunction &MF, MachineBasicBlock &MBB, - const yaml::MachineBasicBlock &YamlMBB, - const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots); + bool initializeMachineBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, + const yaml::MachineBasicBlock &YamlMBB, + const PerFunctionMIParsingState &PFS); + + bool + initializeRegisterInfo(const MachineFunction &MF, + MachineRegisterInfo &RegInfo, + const yaml::MachineFunction &YamlMF, + DenseMap<unsigned, unsigned> &VirtualRegisterSlots); - bool initializeRegisterInfo(MachineRegisterInfo &RegInfo, - const yaml::MachineFunction &YamlMF); + bool initializeFrameInfo(MachineFrameInfo &MFI, + const yaml::MachineFunction &YamlMF); private: /// Return a MIR diagnostic converted from an MI string diagnostic. @@ -109,6 +122,14 @@ private: /// Create an empty function with the given name. void createDummyFunction(StringRef Name, Module &M); + + void initNames2RegClasses(const MachineFunction &MF); + + /// Check if the given identifier is a name of a register class. + /// + /// Return null if the name isn't a register class. + const TargetRegisterClass *getRegClass(const MachineFunction &MF, + StringRef Name); }; } // end namespace llvm @@ -125,6 +146,12 @@ bool MIRParserImpl::error(const Twine &Message) { return true; } +bool MIRParserImpl::error(SMLoc Loc, const Twine &Message) { + Context.diagnose(DiagnosticInfoMIRParser( + DS_Error, SM.GetMessage(Loc, SourceMgr::DK_Error, Message))); + return true; +} + bool MIRParserImpl::error(const SMDiagnostic &Error, SMRange SourceRange) { assert(Error.getKind() == SourceMgr::DK_Error && "Expected an error"); reportDiagnostic(diagFromMIStringDiag(Error, SourceRange)); @@ -233,34 +260,44 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { MF.setAlignment(YamlMF.Alignment); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasInlineAsm(YamlMF.HasInlineAsm); - if (initializeRegisterInfo(MF.getRegInfo(), YamlMF)) + PerFunctionMIParsingState PFS; + if (initializeRegisterInfo(MF, MF.getRegInfo(), YamlMF, + PFS.VirtualRegisterSlots)) + return true; + if (initializeFrameInfo(*MF.getFrameInfo(), YamlMF)) return true; const auto &F = *MF.getFunction(); - DenseMap<unsigned, MachineBasicBlock *> MBBSlots; for (const auto &YamlMBB : YamlMF.BasicBlocks) { const BasicBlock *BB = nullptr; - if (!YamlMBB.Name.empty()) { + const yaml::StringValue &Name = YamlMBB.Name; + if (!Name.Value.empty()) { BB = dyn_cast_or_null<BasicBlock>( - F.getValueSymbolTable().lookup(YamlMBB.Name)); + F.getValueSymbolTable().lookup(Name.Value)); if (!BB) - return error(Twine("basic block '") + YamlMBB.Name + - "' is not defined in the function '" + MF.getName() + "'"); + return error(Name.SourceRange.Start, + Twine("basic block '") + Name.Value + + "' is not defined in the function '" + MF.getName() + + "'"); } auto *MBB = MF.CreateMachineBasicBlock(BB); MF.insert(MF.end(), MBB); - bool WasInserted = MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second; + bool WasInserted = + PFS.MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second; if (!WasInserted) return error(Twine("redefinition of machine basic block with id #") + Twine(YamlMBB.ID)); } + if (YamlMF.BasicBlocks.empty()) + return error(Twine("machine function '") + Twine(MF.getName()) + + "' requires at least one machine basic block in its body"); // Initialize the machine basic blocks after creating them all so that the // machine instructions parser can resolve the MBB references. unsigned I = 0; for (const auto &YamlMBB : YamlMF.BasicBlocks) { if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB, - MBBSlots)) + PFS)) return true; } return false; @@ -269,7 +306,7 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { bool MIRParserImpl::initializeMachineBasicBlock( MachineFunction &MF, MachineBasicBlock &MBB, const yaml::MachineBasicBlock &YamlMBB, - const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) { + const PerFunctionMIParsingState &PFS) { MBB.setAlignment(YamlMBB.Alignment); if (YamlMBB.AddressTaken) MBB.setHasAddressTaken(); @@ -278,16 +315,24 @@ bool MIRParserImpl::initializeMachineBasicBlock( // Parse the successors. for (const auto &MBBSource : YamlMBB.Successors) { MachineBasicBlock *SuccMBB = nullptr; - if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, MBBSlots, IRSlots, + if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, PFS, IRSlots, Error)) return error(Error, MBBSource.SourceRange); // TODO: Report an error when adding the same successor more than once. MBB.addSuccessor(SuccMBB); } + // Parse the liveins. + for (const auto &LiveInSource : YamlMBB.LiveIns) { + unsigned Reg = 0; + if (parseNamedRegisterReference(Reg, SM, MF, LiveInSource.Value, PFS, + IRSlots, Error)) + return error(Error, LiveInSource.SourceRange); + MBB.addLiveIn(Reg); + } // Parse the instructions. for (const auto &MISource : YamlMBB.Instructions) { MachineInstr *MI = nullptr; - if (parseMachineInstr(MI, SM, MF, MISource.Value, MBBSlots, IRSlots, Error)) + if (parseMachineInstr(MI, SM, MF, MISource.Value, PFS, IRSlots, Error)) return error(Error, MISource.SourceRange); MBB.insert(MBB.end(), MI); } @@ -295,7 +340,9 @@ bool MIRParserImpl::initializeMachineBasicBlock( } bool MIRParserImpl::initializeRegisterInfo( - MachineRegisterInfo &RegInfo, const yaml::MachineFunction &YamlMF) { + const MachineFunction &MF, MachineRegisterInfo &RegInfo, + const yaml::MachineFunction &YamlMF, + DenseMap<unsigned, unsigned> &VirtualRegisterSlots) { assert(RegInfo.isSSA()); if (!YamlMF.IsSSA) RegInfo.leaveSSA(); @@ -303,6 +350,67 @@ bool MIRParserImpl::initializeRegisterInfo( if (!YamlMF.TracksRegLiveness) RegInfo.invalidateLiveness(); RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness); + + // Parse the virtual register information. + for (const auto &VReg : YamlMF.VirtualRegisters) { + const auto *RC = getRegClass(MF, VReg.Class.Value); + if (!RC) + return error(VReg.Class.SourceRange.Start, + Twine("use of undefined register class '") + + VReg.Class.Value + "'"); + unsigned Reg = RegInfo.createVirtualRegister(RC); + // TODO: Report an error when the same virtual register with the same ID is + // redefined. + VirtualRegisterSlots.insert(std::make_pair(VReg.ID, Reg)); + } + return false; +} + +bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI, + const yaml::MachineFunction &YamlMF) { + const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo; + MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken); + MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken); + MFI.setHasStackMap(YamlMFI.HasStackMap); + MFI.setHasPatchPoint(YamlMFI.HasPatchPoint); + MFI.setStackSize(YamlMFI.StackSize); + MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment); + if (YamlMFI.MaxAlignment) + MFI.ensureMaxAlignment(YamlMFI.MaxAlignment); + MFI.setAdjustsStack(YamlMFI.AdjustsStack); + MFI.setHasCalls(YamlMFI.HasCalls); + MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize); + MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); + MFI.setHasVAStart(YamlMFI.HasVAStart); + MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); + + // Initialize the fixed frame objects. + for (const auto &Object : YamlMF.FixedStackObjects) { + int ObjectIdx; + if (Object.Type != yaml::FixedMachineStackObject::SpillSlot) + ObjectIdx = MFI.CreateFixedObject(Object.Size, Object.Offset, + Object.IsImmutable, Object.IsAliased); + else + ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset); + MFI.setObjectAlignment(ObjectIdx, Object.Alignment); + // TODO: Store the mapping between fixed object IDs and object indices to + // parse fixed stack object references correctly. + } + + // Initialize the ordinary frame objects. + for (const auto &Object : YamlMF.StackObjects) { + int ObjectIdx; + if (Object.Type == yaml::MachineStackObject::VariableSized) + ObjectIdx = + MFI.CreateVariableSizedObject(Object.Alignment, /*Alloca=*/nullptr); + else + ObjectIdx = MFI.CreateStackObject( + Object.Size, Object.Alignment, + Object.Type == yaml::MachineStackObject::SpillSlot); + MFI.setObjectOffset(ObjectIdx, Object.Offset); + // TODO: Store the mapping between object IDs and object indices to parse + // stack object references correctly. + } return false; } @@ -353,6 +461,26 @@ SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error, Error.getFixIts()); } +void MIRParserImpl::initNames2RegClasses(const MachineFunction &MF) { + if (!Names2RegClasses.empty()) + return; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) { + const auto *RC = TRI->getRegClass(I); + Names2RegClasses.insert( + std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC)); + } +} + +const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF, + StringRef Name) { + initNames2RegClasses(MF); + auto RegClassInfo = Names2RegClasses.find(Name); + if (RegClassInfo == Names2RegClasses.end()) + return nullptr; + return RegClassInfo->getValue(); +} + MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl) : Impl(std::move(Impl)) {} diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index 76cbe2994c95..d5cf9244199e 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -15,10 +15,12 @@ #include "MIRPrinter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSlotTracker.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" @@ -40,9 +42,13 @@ public: void print(const MachineFunction &MF); - void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo); - void convert(const Module &M, yaml::MachineBasicBlock &YamlMBB, + void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI); + void convert(yaml::MachineFrameInfo &YamlMFI, const MachineFrameInfo &MFI); + void convert(ModuleSlotTracker &MST, yaml::MachineBasicBlock &YamlMBB, const MachineBasicBlock &MBB); + void convertStackObjects(yaml::MachineFunction &MF, + const MachineFrameInfo &MFI); private: void initRegisterMaskIds(const MachineFunction &MF); @@ -51,14 +57,14 @@ private: /// This class prints out the machine instructions using the MIR serialization /// format. class MIPrinter { - const Module &M; raw_ostream &OS; + ModuleSlotTracker &MST; const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds; public: - MIPrinter(const Module &M, raw_ostream &OS, + MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST, const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds) - : M(M), OS(OS), RegisterMaskIds(RegisterMaskIds) {} + : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds) {} void print(const MachineInstr &MI); void printMBBReference(const MachineBasicBlock &MBB); @@ -84,6 +90,19 @@ template <> struct BlockScalarTraits<Module> { } // end namespace yaml } // end namespace llvm +static void printReg(unsigned Reg, raw_ostream &OS, + const TargetRegisterInfo *TRI) { + // TODO: Print Stack Slots. + if (!Reg) + OS << '_'; + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); + else if (Reg < TRI->getNumRegs()) + OS << '%' << StringRef(TRI->getName(Reg)).lower(); + else + llvm_unreachable("Can't print this kind of register yet"); +} + void MIRPrinter::print(const MachineFunction &MF) { initRegisterMaskIds(MF); @@ -92,10 +111,12 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasInlineAsm = MF.hasInlineAsm(); - convert(YamlMF, MF.getRegInfo()); + convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); + convert(YamlMF.FrameInfo, *MF.getFrameInfo()); + convertStackObjects(YamlMF, *MF.getFrameInfo()); int I = 0; - const auto &M = *MF.getFunction()->getParent(); + ModuleSlotTracker MST(MF.getFunction()->getParent()); for (const auto &MBB : MF) { // TODO: Allow printing of non sequentially numbered MBBs. // This is currently needed as the basic block references get their index @@ -105,7 +126,7 @@ void MIRPrinter::print(const MachineFunction &MF) { "Can't print MBBs that aren't sequentially numbered"); (void)I; yaml::MachineBasicBlock YamlMBB; - convert(M, YamlMBB, MBB); + convert(MST, YamlMBB, MBB); YamlMF.BasicBlocks.push_back(YamlMBB); } yaml::Output Out(OS); @@ -113,37 +134,120 @@ void MIRPrinter::print(const MachineFunction &MF) { } void MIRPrinter::convert(yaml::MachineFunction &MF, - const MachineRegisterInfo &RegInfo) { + const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI) { MF.IsSSA = RegInfo.isSSA(); MF.TracksRegLiveness = RegInfo.tracksLiveness(); MF.TracksSubRegLiveness = RegInfo.subRegLivenessEnabled(); + + // Print the virtual register definitions. + for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + yaml::VirtualRegisterDefinition VReg; + VReg.ID = I; + VReg.Class = + StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); + MF.VirtualRegisters.push_back(VReg); + } +} + +void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI, + const MachineFrameInfo &MFI) { + YamlMFI.IsFrameAddressTaken = MFI.isFrameAddressTaken(); + YamlMFI.IsReturnAddressTaken = MFI.isReturnAddressTaken(); + YamlMFI.HasStackMap = MFI.hasStackMap(); + YamlMFI.HasPatchPoint = MFI.hasPatchPoint(); + YamlMFI.StackSize = MFI.getStackSize(); + YamlMFI.OffsetAdjustment = MFI.getOffsetAdjustment(); + YamlMFI.MaxAlignment = MFI.getMaxAlignment(); + YamlMFI.AdjustsStack = MFI.adjustsStack(); + YamlMFI.HasCalls = MFI.hasCalls(); + YamlMFI.MaxCallFrameSize = MFI.getMaxCallFrameSize(); + YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment(); + YamlMFI.HasVAStart = MFI.hasVAStart(); + YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); } -void MIRPrinter::convert(const Module &M, yaml::MachineBasicBlock &YamlMBB, +void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, + const MachineFrameInfo &MFI) { + // Process fixed stack objects. + unsigned ID = 0; + for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { + if (MFI.isDeadObjectIndex(I)) + continue; + + yaml::FixedMachineStackObject YamlObject; + YamlObject.ID = ID++; + YamlObject.Type = MFI.isSpillSlotObjectIndex(I) + ? yaml::FixedMachineStackObject::SpillSlot + : yaml::FixedMachineStackObject::DefaultType; + YamlObject.Offset = MFI.getObjectOffset(I); + YamlObject.Size = MFI.getObjectSize(I); + YamlObject.Alignment = MFI.getObjectAlignment(I); + YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I); + YamlObject.IsAliased = MFI.isAliasedObjectIndex(I); + MF.FixedStackObjects.push_back(YamlObject); + // TODO: Store the mapping between fixed object IDs and object indices to + // print the fixed stack object references correctly. + } + + // Process ordinary stack objects. + ID = 0; + for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I) { + if (MFI.isDeadObjectIndex(I)) + continue; + + yaml::MachineStackObject YamlObject; + YamlObject.ID = ID++; + YamlObject.Type = MFI.isSpillSlotObjectIndex(I) + ? yaml::MachineStackObject::SpillSlot + : MFI.isVariableSizedObjectIndex(I) + ? yaml::MachineStackObject::VariableSized + : yaml::MachineStackObject::DefaultType; + YamlObject.Offset = MFI.getObjectOffset(I); + YamlObject.Size = MFI.getObjectSize(I); + YamlObject.Alignment = MFI.getObjectAlignment(I); + + MF.StackObjects.push_back(YamlObject); + // TODO: Store the mapping between object IDs and object indices to print + // the stack object references correctly. + } +} + +void MIRPrinter::convert(ModuleSlotTracker &MST, + yaml::MachineBasicBlock &YamlMBB, const MachineBasicBlock &MBB) { assert(MBB.getNumber() >= 0 && "Invalid MBB number"); YamlMBB.ID = (unsigned)MBB.getNumber(); // TODO: Serialize unnamed BB references. if (const auto *BB = MBB.getBasicBlock()) - YamlMBB.Name = BB->hasName() ? BB->getName() : "<unnamed bb>"; + YamlMBB.Name.Value = BB->hasName() ? BB->getName() : "<unnamed bb>"; else - YamlMBB.Name = ""; + YamlMBB.Name.Value = ""; YamlMBB.Alignment = MBB.getAlignment(); YamlMBB.AddressTaken = MBB.hasAddressTaken(); YamlMBB.IsLandingPad = MBB.isLandingPad(); for (const auto *SuccMBB : MBB.successors()) { std::string Str; raw_string_ostream StrOS(Str); - MIPrinter(M, StrOS, RegisterMaskIds).printMBBReference(*SuccMBB); + MIPrinter(StrOS, MST, RegisterMaskIds).printMBBReference(*SuccMBB); YamlMBB.Successors.push_back(StrOS.str()); } - + // Print the live in registers. + const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + for (auto I = MBB.livein_begin(), E = MBB.livein_end(); I != E; ++I) { + std::string Str; + raw_string_ostream StrOS(Str); + printReg(*I, StrOS, TRI); + YamlMBB.LiveIns.push_back(StrOS.str()); + } // Print the machine instructions. YamlMBB.Instructions.reserve(MBB.size()); std::string Str; for (const auto &MI : MBB) { raw_string_ostream StrOS(Str); - MIPrinter(M, StrOS, RegisterMaskIds).print(MI); + MIPrinter(StrOS, MST, RegisterMaskIds).print(MI); YamlMBB.Instructions.push_back(StrOS.str()); Str.clear(); } @@ -188,18 +292,6 @@ void MIPrinter::print(const MachineInstr &MI) { } } -static void printReg(unsigned Reg, raw_ostream &OS, - const TargetRegisterInfo *TRI) { - // TODO: Print Stack Slots. - // TODO: Print virtual registers. - if (!Reg) - OS << '_'; - else if (Reg < TRI->getNumRegs()) - OS << '%' << StringRef(TRI->getName(Reg)).lower(); - else - llvm_unreachable("Can't print this kind of register yet"); -} - void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) { OS << "%bb." << MBB.getNumber(); if (const auto *BB = MBB.getBasicBlock()) { @@ -211,9 +303,19 @@ void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) { void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) { switch (Op.getType()) { case MachineOperand::MO_Register: - // TODO: Print register flags. + // TODO: Print the other register flags. + if (Op.isImplicit()) + OS << (Op.isDef() ? "implicit-def " : "implicit "); + if (Op.isDead()) + OS << "dead "; + if (Op.isKill()) + OS << "killed "; + if (Op.isUndef()) + OS << "undef "; printReg(Op.getReg(), OS, TRI); - // TODO: Print sub register. + // Print the sub register. + if (Op.getSubReg() != 0) + OS << ':' << TRI->getSubRegIndexName(Op.getSubReg()); break; case MachineOperand::MO_Immediate: OS << Op.getImm(); @@ -222,10 +324,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) { printMBBReference(*Op.getMBB()); break; case MachineOperand::MO_GlobalAddress: - // FIXME: Make this faster - print as operand will create a slot tracker to - // print unnamed values for the whole module every time it's called, which - // is inefficient. - Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, &M); + Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); // TODO: Print offset and target flags. break; case MachineOperand::MO_RegisterMask: { diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 467a2e4eb428..3f04bb0b532b 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -19,8 +19,8 @@ using namespace llvm; namespace llvm { -TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>); -TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>); +template class DomTreeNodeBase<MachineBasicBlock>; +template class DominatorTreeBase<MachineBasicBlock>; } char MachineDominatorTree::ID = 0; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 800d1b5bd57d..9856e70edaef 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -74,7 +75,7 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, if (Fn->hasFnAttribute(Attribute::StackAlignment)) FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment()); - ConstantPool = new (Allocator) MachineConstantPool(TM); + ConstantPool = new (Allocator) MachineConstantPool(getDataLayout()); Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. @@ -118,6 +119,10 @@ MachineFunction::~MachineFunction() { } } +const DataLayout &MachineFunction::getDataLayout() const { + return Fn->getParent()->getDataLayout(); +} + /// Get the JumpTableInfo for this function. /// If it does not already exist, allocate one. MachineJumpTableInfo *MachineFunction:: @@ -458,12 +463,12 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, /// normal 'L' label is returned. MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { - const DataLayout *DL = getTarget().getDataLayout(); + const DataLayout &DL = getDataLayout(); assert(JumpTableInfo && "No jump tables"); assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); - const char *Prefix = isLinkerPrivate ? DL->getLinkerPrivateGlobalPrefix() : - DL->getPrivateGlobalPrefix(); + const char *Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix() + : DL.getPrivateGlobalPrefix(); SmallString<60> Name; raw_svector_ostream(Name) << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; @@ -472,9 +477,9 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, /// Return a function-local symbol to represent the PIC base. MCSymbol *MachineFunction::getPICBaseSymbol() const { - const DataLayout *DL = getTarget().getDataLayout(); - return Ctx.getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ - Twine(getFunctionNumber())+"$pb"); + const DataLayout &DL = getDataLayout(); + return Ctx.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + Twine(getFunctionNumber()) + "$pb"); } //===----------------------------------------------------------------------===// @@ -790,10 +795,6 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } void MachineConstantPoolValue::anchor() { } -const DataLayout *MachineConstantPool::getDataLayout() const { - return TM.getDataLayout(); -} - Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); @@ -851,7 +852,7 @@ MachineConstantPool::~MachineConstantPool() { /// Test whether the given two constants can be allocated the same constant pool /// entry. static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, - const DataLayout *TD) { + const DataLayout &DL) { // Handle the trivial case quickly. if (A == B) return true; @@ -865,8 +866,8 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, return false; // For now, only support constants with the same size. - uint64_t StoreSize = TD->getTypeStoreSize(A->getType()); - if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128) + uint64_t StoreSize = DL.getTypeStoreSize(A->getType()); + if (StoreSize != DL.getTypeStoreSize(B->getType()) || StoreSize > 128) return false; Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8); @@ -877,16 +878,16 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, // DataLayout. if (isa<PointerType>(A->getType())) A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, - const_cast<Constant *>(A), *TD); + const_cast<Constant *>(A), DL); else if (A->getType() != IntTy) A = ConstantFoldInstOperands(Instruction::BitCast, IntTy, - const_cast<Constant *>(A), *TD); + const_cast<Constant *>(A), DL); if (isa<PointerType>(B->getType())) B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, - const_cast<Constant *>(B), *TD); + const_cast<Constant *>(B), DL); else if (B->getType() != IntTy) B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, - const_cast<Constant *>(B), *TD); + const_cast<Constant *>(B), DL); return A == B; } @@ -903,8 +904,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, // FIXME, this could be made much more efficient for large constant pools. for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (!Constants[i].isMachineConstantPoolEntry() && - CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, - getDataLayout())) { + CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, DL)) { if ((unsigned)Constants[i].getAlignment() < Alignment) Constants[i].Alignment = Alignment; return i; diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 42d0603ab96b..6a206249d834 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -320,7 +320,10 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, const Function *Personality) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); LP.Personality = Personality; + addPersonality(Personality); +} +void MachineModuleInfo::addPersonality(const Function *Personality) { for (unsigned i = 0; i < Personalities.size(); ++i) if (Personalities[i] == Personality) return; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 278a8f24d63e..5984af87a184 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" #include "llvm/Support/raw_os_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -28,7 +29,6 @@ MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF) TracksSubRegLiveness(false) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs()); // Create the physreg use/def lists. @@ -441,3 +441,49 @@ void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const { UseMI->getOperand(0).setReg(0U); } } + +static const Function *getCalledFunction(const MachineInstr &MI) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isGlobal()) + continue; + const Function *Func = dyn_cast<Function>(MO.getGlobal()); + if (Func != nullptr) + return Func; + } + return nullptr; +} + +static bool isNoReturnDef(const MachineOperand &MO) { + // Anything which is not a noreturn function is a real def. + const MachineInstr &MI = *MO.getParent(); + if (!MI.isCall()) + return false; + const MachineBasicBlock &MBB = *MI.getParent(); + if (!MBB.succ_empty()) + return false; + const MachineFunction &MF = *MBB.getParent(); + // We need to keep correct unwind information even if the function will + // not return, since the runtime may need it. + if (MF.getFunction()->hasFnAttribute(Attribute::UWTable)) + return false; + const Function *Called = getCalledFunction(MI); + if (Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) + || !Called->hasFnAttribute(Attribute::NoUnwind)) + return false; + + return true; +} + +bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const { + if (UsedPhysRegMask.test(PhysReg)) + return true; + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) { + for (const MachineOperand &MO : make_range(def_begin(*AI), def_end())) { + if (isNoReturnDef(MO)) + continue; + return true; + } + } + return false; +} diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index f9adba0b35c4..9404c687d410 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -509,18 +509,17 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { << " height.\n"); // Find any MBB predecessors that have MBB as their preferred successor. // They are the only ones that need to be invalidated. - for (MachineBasicBlock::const_pred_iterator - I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { - TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()]; if (!TBI.hasValidHeight()) continue; if (TBI.Succ == MBB) { TBI.invalidateHeight(); - WorkList.push_back(*I); + WorkList.push_back(Pred); continue; } // Verify that TBI.Succ is actually a *I successor. - assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed"); + assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed"); } } while (!WorkList.empty()); } @@ -535,18 +534,17 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { << " depth.\n"); // Find any MBB successors that have MBB as their preferred predecessor. // They are the only ones that need to be invalidated. - for (MachineBasicBlock::const_succ_iterator - I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + for (const MachineBasicBlock *Succ : MBB->successors()) { + TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()]; if (!TBI.hasValidDepth()) continue; if (TBI.Pred == MBB) { TBI.invalidateDepth(); - WorkList.push_back(*I); + WorkList.push_back(Succ); continue; } // Verify that TBI.Pred is actually a *I predecessor. - assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed"); + assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed"); } } while (!WorkList.empty()); } @@ -998,8 +996,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { // MBB is the highest precomputed block in the trace. if (MBB) { TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; - for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { - LiveInReg LI = TBI.LiveIns[i]; + for (LiveInReg &LI : TBI.LiveIns) { if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) { // For virtual registers, the def latency is included. unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; @@ -1131,11 +1128,16 @@ computeInstrHeights(const MachineBasicBlock *MBB) { MachineTraceMetrics::Trace MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { - // FIXME: Check cache tags, recompute as needed. - computeTrace(MBB); - computeInstrDepths(MBB); - computeInstrHeights(MBB); - return Trace(*this, BlockInfo[MBB->getNumber()]); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + + if (!TBI.hasValidDepth() || !TBI.hasValidHeight()) + computeTrace(MBB); + if (!TBI.HasValidInstrDepths) + computeInstrDepths(MBB); + if (!TBI.HasValidInstrHeights) + computeInstrHeights(MBB); + + return Trace(*this, TBI); } unsigned @@ -1204,8 +1206,7 @@ unsigned MachineTraceMetrics::Trace::getResourceLength( unsigned ResourceIdx) ->unsigned { unsigned Cycles = 0; - for (unsigned I = 0; I != Instrs.size(); ++I) { - const MCSchedClassDesc *SC = Instrs[I]; + for (const MCSchedClassDesc *SC : Instrs) { if (!SC->isValid()) continue; for (TargetSchedModel::ProcResIter @@ -1223,8 +1224,8 @@ unsigned MachineTraceMetrics::Trace::getResourceLength( for (unsigned K = 0; K != PRDepths.size(); ++K) { unsigned PRCycles = PRDepths[K] + PRHeights[K]; - for (unsigned I = 0; I != Extrablocks.size(); ++I) - PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; + for (const MachineBasicBlock *MBB : Extrablocks) + PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K]; PRCycles += extraCycles(ExtraInstrs, K); PRCycles -= extraCycles(RemoveInstrs, K); PRMax = std::max(PRMax, PRCycles); @@ -1235,8 +1236,8 @@ unsigned MachineTraceMetrics::Trace::getResourceLength( // Instrs: #instructions in current trace outside current block. unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; // Add instruction count from the extra blocks. - for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) - Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + for (const MachineBasicBlock *MBB : Extrablocks) + Instrs += TE.MTM.getResources(MBB)->InstrCount; Instrs += ExtraInstrs.size(); Instrs -= RemoveInstrs.size(); if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 210a7a1649cd..024d166a4987 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -214,10 +214,10 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) - : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr), - Started(true), Stopped(false), AddingMachinePasses(false), TM(tm), - Impl(nullptr), Initialized(false), DisableVerify(false), - EnableTailMerge(true), EnableShrinkWrap(false) { + : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr), + StopAfter(nullptr), Started(true), Stopped(false), + AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false), + DisableVerify(false), EnableTailMerge(true), EnableShrinkWrap(false) { Impl = new PassConfigImpl(); @@ -288,6 +288,8 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { // and shouldn't reference it. AnalysisID PassID = P->getPassID(); + if (StartBefore == PassID) + Started = true; if (Started && !Stopped) { std::string Banner; // Construct banner message before PM->add() as that may delete the pass. @@ -422,7 +424,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { // removed from the parent invoke(s). This could happen when a landing // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. - addPass(createSjLjEHPreparePass(TM)); + addPass(createSjLjEHPreparePass()); // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 76583f0de888..b2fdee6c8e4c 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -82,7 +82,8 @@ private: void calculateSets(MachineFunction &Fn); void calculateCallsInformation(MachineFunction &Fn); - void calculateCalleeSavedRegisters(MachineFunction &Fn); + void assignCalleeSavedSpillSlots(MachineFunction &Fn, + const BitVector &SavedRegs); void insertCSRSpillsAndRestores(MachineFunction &Fn); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); @@ -92,7 +93,7 @@ private: void insertPrologEpilogCode(MachineFunction &Fn); // Convenience for recognizing return blocks. - bool isReturnBlock(MachineBasicBlock *MBB); + bool isReturnBlock(const MachineBasicBlock *MBB) const; }; } // namespace @@ -127,7 +128,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -bool PEI::isReturnBlock(MachineBasicBlock* MBB) { +bool PEI::isReturnBlock(const MachineBasicBlock* MBB) const { return (MBB && !MBB->empty() && MBB->back().isReturn()); } @@ -143,7 +144,12 @@ void PEI::calculateSets(MachineFunction &Fn) { if (MFI->getSavePoint()) { SaveBlock = MFI->getSavePoint(); assert(MFI->getRestorePoint() && "Both restore and save must be set"); - RestoreBlocks.push_back(MFI->getRestorePoint()); + MachineBasicBlock *RestoreBlock = MFI->getRestorePoint(); + // If RestoreBlock does not have any successor and is not a return block + // then the end point is unreachable and we do not need to insert any + // epilogue. + if (!RestoreBlock->succ_empty() || isReturnBlock(RestoreBlock)) + RestoreBlocks.push_back(RestoreBlock); return; } @@ -178,13 +184,12 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // instructions. calculateCallsInformation(Fn); - // Allow the target machine to make some adjustments to the function - // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. - TFI->processFunctionBeforeCalleeSavedScan(Fn, RS); + // Determine which of the registers in the callee save list should be saved. + BitVector SavedRegs; + TFI->determineCalleeSaves(Fn, SavedRegs, RS); - // Scan the function for modified callee saved registers and insert spill code - // for any callee saved registers that are modified. - calculateCalleeSavedRegisters(Fn); + // Insert spill code for any callee saved registers that are modified. + assignCalleeSavedSpillSlots(Fn, SavedRegs); // Determine placement of CSR spill/restore code: // place all spills in the entry block, all restores in return blocks. @@ -290,39 +295,27 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { } } - -/// calculateCalleeSavedRegisters - Scan the function for modified callee saved -/// registers. -void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { - const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); - const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); - MachineFrameInfo *MFI = F.getFrameInfo(); - - // Get the callee saved register list... - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F); - +void PEI::assignCalleeSavedSpillSlots(MachineFunction &F, + const BitVector &SavedRegs) { // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; MaxCSFrameIndex = 0; - // Early exit for targets which have no callee saved registers. - if (!CSRegs || CSRegs[0] == 0) + if (SavedRegs.empty()) return; - // In Naked functions we aren't going to save any registers. - if (F.getFunction()->hasFnAttribute(Attribute::Naked)) - return; + const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F); std::vector<CalleeSavedInfo> CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - // Functions which call __builtin_unwind_init get all their registers saved. - if (F.getRegInfo().isPhysRegUsed(Reg) || F.getMMI().callsUnwindInit()) { - // If the reg is modified, save it! + if (SavedRegs.test(Reg)) CSI.push_back(CalleeSavedInfo(Reg)); - } } + const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); + MachineFrameInfo *MFI = F.getFrameInfo(); if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { // If target doesn't implement this, use generic code. @@ -1033,12 +1026,8 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Replace this reference to the virtual register with the // scratch register. assert (ScratchReg && "Missing scratch register!"); - MachineRegisterInfo &MRI = Fn.getRegInfo(); Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); - // Make sure MRI now accounts this register as used. - MRI.setPhysRegUsed(ScratchReg); - // Because this instruction was processed by the RS before this // register was allocated, make sure that the RS now records the // register as being used. diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index fd3d4d78968b..660bb4f0e34d 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -986,10 +986,6 @@ void RAFast::AllocateBasicBlock() { } } - for (UsedInInstrSet::iterator - I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setRegUnitUsed(*I); - // Track registers defined by instruction - early clobbers and tied uses at // this point. UsedInInstr.clear(); @@ -1050,10 +1046,6 @@ void RAFast::AllocateBasicBlock() { killVirtReg(VirtDead[i]); VirtDead.clear(); - for (UsedInInstrSet::iterator - I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setRegUnitUsed(*I); - if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { DEBUG(dbgs() << "-- coalescing: " << *MI); Coalesced.push_back(MI); @@ -1103,12 +1095,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { AllocateBasicBlock(); } - // Add the clobber lists for all the instructions we skipped earlier. - for (const MCInstrDesc *Desc : SkippedInstrs) - if (const uint16_t *Defs = Desc->getImplicitDefs()) - while (*Defs) - MRI->setPhysRegUsed(*Defs++); - // All machine operands and other references to virtual registers have been // replaced. Remove the virtual registers. MRI->clearVirtRegs(); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 26f42c93323a..7ebcf7f54856 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -400,6 +400,8 @@ private: typedef SmallVector<HintInfo, 4> HintsInfo; BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); void collectHintInfo(unsigned, HintsInfo &); + + bool isUnusedCalleeSavedReg(unsigned PhysReg) const; }; } // end anonymous namespace @@ -816,6 +818,16 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, } } +/// Returns true if the given \p PhysReg is a callee saved register and has not +/// been used for allocation yet. +bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const { + unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); + if (CSR == 0) + return false; + + return !Matrix->isPhysRegUsed(PhysReg); +} + /// tryEvict - Try to evict all interferences for a physreg. /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. @@ -861,13 +873,12 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, continue; // The first use of a callee-saved register in a function has cost 1. // Don't start using a CSR when the CostPerUseLimit is low. - if (CostPerUseLimit == 1) - if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) - if (!MRI->isPhysRegUsed(CSR)) { - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " - << PrintReg(CSR, TRI) << '\n'); - continue; - } + if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " + << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) + << '\n'); + continue; + } if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) continue; @@ -1348,9 +1359,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, unsigned BestCand = NoCand; Order.rewind(); while (unsigned PhysReg = Order.next()) { - if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) - if (IgnoreCSR && !MRI->isPhysRegUsed(CSR)) - continue; + if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg)) + continue; // Discard bad candidates before we run out of interference cache cursors. // This will only affect register classes with a lot of registers (>32). @@ -2134,7 +2144,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, unsigned ItVirtReg = (*It)->reg; if (VRM->hasPhys(ItVirtReg)) Matrix->unassign(**It); - Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]); + unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg]; + Matrix->assign(**It, ItPhysReg); } } @@ -2441,16 +2452,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { - // We check other options if we are using a CSR for the first time. - bool CSRFirstUse = false; - if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg)) - if (!MRI->isPhysRegUsed(CSR)) - CSRFirstUse = true; - // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. - if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) { + if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && + NewVRegs.empty()) { unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, CostPerUseLimit, NewVRegs); if (CSRReg || !NewVRegs.empty()) diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 450a3051c6ff..c3786e552a13 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -77,6 +77,16 @@ void RegPressureTracker::dump() const { P.dump(TRI); } +void PressureDiff::dump(const TargetRegisterInfo &TRI) const { + for (const PressureChange &Change : *this) { + if (!Change.isValid() || Change.getUnitInc() == 0) + continue; + dbgs() << " " << TRI.getRegPressureSetName(Change.getPSet()) + << " " << Change.getUnitInc(); + } + dbgs() << '\n'; +} + /// Increase the current pressure as impacted by these registers and bump /// the high water mark if needed. void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) { @@ -787,6 +797,8 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta Delta2; getUpwardPressureDelta(MI, *PDiff, Delta2, CriticalPSets, MaxPressureLimit); if (Delta != Delta2) { + dbgs() << "PDiff: "; + PDiff->dump(*TRI); dbgs() << "DELTA: " << *MI; if (Delta.Excess.isValid()) dbgs() << "Excess1 " << TRI->getRegPressureSetName(Delta.Excess.getPSet()) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6056d93ddc7a..52d620b1d540 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -443,8 +443,9 @@ namespace { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) return LHSTy; - return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) - : TLI.getPointerTy(); + auto &DL = DAG.getDataLayout(); + return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) + : TLI.getPointerTy(DL); } /// This method returns true if we are running before type legalization or @@ -456,7 +457,7 @@ namespace { /// Convenience wrapper around TargetLowering::getSetCCResultType EVT getSetCCResultType(EVT VT) const { - return TLI.getSetCCResultType(*DAG.getContext(), VT); + return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } }; } @@ -3111,7 +3112,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // For big endian targets, we need to add an offset to the pointer // to load the correct bytes. For little endian systems, we merely // need to read fewer bytes from the same pointer. - if (TLI.isBigEndian()) { + if (DAG.getDataLayout().isBigEndian()) { unsigned LVTStoreBytes = LoadedVT.getStoreSize(); unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; @@ -6675,7 +6676,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. - if (TLI.isBigEndian()) { + if (DAG.getDataLayout().isBigEndian()) { unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; @@ -6873,7 +6874,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - bool isLE = TLI.isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -6926,7 +6927,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - EVT IndexTy = TLI.getVectorIdxTy(); + EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), @@ -7093,8 +7094,8 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { !LD2->isVolatile() && DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { unsigned Align = LD1->getAlignment(); - unsigned NewAlign = TLI.getDataLayout()-> - getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) @@ -7150,13 +7151,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile() && // Do not remove the cast if the types differ in endian layout. - TLI.hasBigEndianPartOrdering(N0.getValueType()) == - TLI.hasBigEndianPartOrdering(VT) && + TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == + TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - unsigned Align = TLI.getDataLayout()-> - getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + unsigned Align = DAG.getDataLayout().getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); if (Align <= OrigAlign) { @@ -7368,7 +7369,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; i += NumInputsPerOutput) { - bool isLE = TLI.isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); APInt NewBits = APInt(DstBitSize, 0); bool EltIsUndef = true; for (unsigned j = 0; j != NumInputsPerOutput; ++j) { @@ -7415,7 +7416,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } // For big endian targets, swap the order of the pieces of each element. - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } @@ -8373,6 +8374,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (TLI.combineRepeatedFPDivisors(Users.size())) { SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + // FIXME: This optimization requires some level of fast-math, so the + // created reciprocal node should at least have the 'allowReciprocal' + // fast-math-flag set. SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); // Dividend / Divisor -> Dividend * Reciprocal @@ -8381,10 +8385,14 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (Dividend != FPOne) { SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, Reciprocal); - DAG.ReplaceAllUsesWith(U, NewNode.getNode()); + CombineTo(U, NewNode); + } else if (U != Reciprocal.getNode()) { + // In the absence of fast-math-flags, this user node is always the + // same node as Reciprocal, but with FMF they may be different nodes. + CombineTo(U, Reciprocal); } } - return SDValue(); + return SDValue(N, 0); // N was replaced. } } @@ -8406,30 +8414,29 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (DAG.getTarget().Options.UnsafeFPMath && - !TLI.isFsqrtCheap()) { - // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) - if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { - EVT VT = RV.getValueType(); - SDLoc DL(N); - RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); - AddToWorklist(RV.getNode()); + if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) + return SDValue(); - // Unfortunately, RV is now NaN if the input was exactly 0. - // Select out this case and force the answer to 0. - SDValue Zero = DAG.getConstantFP(0.0, DL, VT); - SDValue ZeroCmp = - DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT), - N->getOperand(0), Zero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - AddToWorklist(RV.getNode()); + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) + SDValue RV = BuildRsqrtEstimate(N->getOperand(0)); + if (!RV) + return SDValue(); + + EVT VT = RV.getValueType(); + SDLoc DL(N); + RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); + AddToWorklist(RV.getNode()); - RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, - DL, VT, ZeroCmp, Zero, RV); - return RV; - } - } - return SDValue(); + // Unfortunately, RV is now NaN if the input was exactly 0. + // Select out this case and force the answer to 0. + SDValue Zero = DAG.getConstantFP(0.0, DL, VT); + EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + AddToWorklist(RV.getNode()); + + return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, + ZeroCmp, Zero, RV); } SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { @@ -9144,7 +9151,8 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, } else return false; - return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()), AS); + return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, + VT.getTypeForEVT(*DAG.getContext()), AS); } /// Try turning a load/store into a pre-indexed load/store when the base @@ -9869,8 +9877,7 @@ struct LoadedSlice { /// \pre DAG != nullptr. uint64_t getOffsetFromBase() const { assert(DAG && "Missing context."); - bool IsBigEndian = - DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); + bool IsBigEndian = DAG->getDataLayout().isBigEndian(); assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); uint64_t Offset = Shift / 8; unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; @@ -9953,7 +9960,7 @@ struct LoadedSlice { // Check if it will be merged with the load. // 1. Check the alignment constraint. - unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( + unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( ResVT.getTypeForEVT(*DAG->getContext())); if (RequiredAlignment > getAlignment()) @@ -10321,7 +10328,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, unsigned StOffset; unsigned NewAlign = St->getAlignment(); - if (DAG.getTargetLoweringInfo().isLittleEndian()) + if (DAG.getDataLayout().isLittleEndian()) StOffset = ByteShift; else StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; @@ -10434,12 +10441,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) + if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), @@ -10503,7 +10510,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { unsigned LDAlign = LD->getAlignment(); unsigned STAlign = ST->getAlignment(); Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); + unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); @@ -10685,7 +10692,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // Construct a single integer constant which is made of the smaller // constant inputs. - bool IsLE = TLI.isLittleEndian(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); for (unsigned i = 0; i < NumElem ; ++i) { unsigned Idx = IsLE ? (NumElem - 1 - i) : i; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); @@ -10743,7 +10750,7 @@ static bool allowableAlignment(const SelectionDAG &DAG, return true; Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty); + unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty); return (Align >= ABIAlignment); } @@ -11205,8 +11212,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); - unsigned Align = TLI.getDataLayout()-> - getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); + unsigned Align = DAG.getDataLayout().getABITypeAlignment( + SVT.getTypeForEVT(*DAG.getContext())); if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) @@ -11265,7 +11272,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); - if (TLI.isBigEndian()) std::swap(Lo, Hi); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); @@ -11514,7 +11522,7 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); unsigned Align = OriginalLoad->getAlignment(); - unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( + unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( VecEltVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) @@ -11648,7 +11656,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // scalar_to_vector here as well. if (!LegalOperations) { - EVT IndexTy = TLI.getVectorIdxTy(); + EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); } @@ -11825,7 +11833,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!ValidTypes) return SDValue(); - bool isLE = TLI.isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): @@ -12079,10 +12087,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // Try to replace VecIn1 with two extract_subvectors // No need to update the masks, they should still be correct. - VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy())); - VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(0, dl, TLI.getVectorIdxTy())); + VecIn2 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(VT.getVectorNumElements(), dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + VecIn1 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } else return SDValue(); } @@ -13354,12 +13365,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, const_cast<ConstantFP*>(TV->getConstantFPValue()) }; Type *FPTy = Elts[0]->getType(); - const DataLayout &TD = *TLI.getDataLayout(); + const DataLayout &TD = DAG.getDataLayout(); // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); - SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), - TD.getPrefTypeAlignment(FPTy)); + SDValue CPIdx = + DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), + TD.getPrefTypeAlignment(FPTy)); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); // Get the offsets to the 0 and 1 element of the array so that we can @@ -13832,6 +13844,15 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // If they are both volatile then they cannot be reordered. if (Op0->isVolatile() && Op1->isVolatile()) return true; + // If one operation reads from invariant memory, and the other may store, they + // cannot alias. These should really be checking the equivalent of mayWrite, + // but it only matters for memory nodes other than load /store. + if (Op0->isInvariant() && Op1->writeMem()) + return false; + + if (Op1->isInvariant() && Op0->writeMem()) + return false; + // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 5452b1721bb4..2b9ba2c1b534 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -166,7 +166,7 @@ bool FastISel::hasTrivialKill(const Value *V) { } unsigned FastISel::getRegForValue(const Value *V) { - EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); + EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. if (!RealVT.isSimple()) return 0; @@ -228,7 +228,7 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) { if (!Reg) { // Try to emit the constant by using an integer constant with a cast. const APFloat &Flt = CF->getValueAPF(); - EVT IntVT = TLI.getPointerTy(); + EVT IntVT = TLI.getPointerTy(DL); uint64_t x[2]; uint32_t IntBitWidth = IntVT.getSizeInBits(); @@ -321,7 +321,7 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { bool IdxNIsKill = hasTrivialKill(Idx); // If the index is smaller or larger than intptr_t, truncate or extend it. - MVT PtrVT = TLI.getPointerTy(); + MVT PtrVT = TLI.getPointerTy(DL); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); if (IdxVT.bitsLT(PtrVT)) { IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN, @@ -493,7 +493,7 @@ bool FastISel::selectGetElementPtr(const User *I) { // FIXME: What's a good SWAG number for MaxOffs? uint64_t MaxOffs = 2048; Type *Ty = I->getOperand(0)->getType(); - MVT VT = TLI.getPointerTy(); + MVT VT = TLI.getPointerTy(DL); for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1, E = I->op_end(); OI != E; ++OI) { @@ -908,10 +908,10 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { // Handle the incoming return values from the call. CLI.clearIns(); SmallVector<EVT, 4> RetTys; - ComputeValueVTs(TLI, CLI.RetTy, RetTys); + ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys); SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI); + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL); bool CanLowerReturn = TLI.CanLowerReturn( CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext()); @@ -976,7 +976,7 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { // not there, but there are cases it cannot get right. unsigned FrameAlign = Arg.Alignment; if (!FrameAlign) - FrameAlign = TLI.getByValTypeAlignment(ElementTy); + FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL); Flags.setByValSize(FrameSize); Flags.setByValAlign(FrameAlign); } @@ -1245,8 +1245,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { } bool FastISel::selectCast(const User *I, unsigned Opcode) { - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple()) @@ -1288,8 +1288,8 @@ bool FastISel::selectBitCast(const User *I) { } // Bitcasts of other values become reg-reg copies or BITCAST operators. - EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType()); - EVT DstEVT = TLI.getValueType(I->getType()); + EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstEVT = TLI.getValueType(DL, I->getType()); if (SrcEVT == MVT::Other || DstEVT == MVT::Other || !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT)) // Unhandled type. Halt "fast" selection and bail. @@ -1413,7 +1413,7 @@ bool FastISel::selectFNeg(const User *I) { bool OpRegIsKill = hasTrivialKill(I); // If the target has ISD::FNEG, use it. - EVT VT = TLI.getValueType(I->getType()); + EVT VT = TLI.getValueType(DL, I->getType()); unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, OpReg, OpRegIsKill); if (ResultReg) { @@ -1456,7 +1456,7 @@ bool FastISel::selectExtractValue(const User *U) { // Make sure we only try to handle extracts with a legal result. But also // allow i1 because it's easy. - EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true); + EVT RealVT = TLI.getValueType(DL, EVI->getType(), /*AllowUnknown=*/true); if (!RealVT.isSimple()) return false; MVT VT = RealVT.getSimpleVT(); @@ -1480,7 +1480,7 @@ bool FastISel::selectExtractValue(const User *U) { unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices()); SmallVector<EVT, 4> AggValueVTs; - ComputeValueVTs(TLI, AggTy, AggValueVTs); + ComputeValueVTs(TLI, DL, AggTy, AggValueVTs); for (unsigned i = 0; i < VTIndex; i++) ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]); @@ -1582,8 +1582,8 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); if (DstVT.bitsGT(SrcVT)) return selectCast(I, ISD::ZERO_EXTEND); if (DstVT.bitsLT(SrcVT)) @@ -1612,7 +1612,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo, bool SkipTargetIndependentISel) : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), - TM(FuncInfo.MF->getTarget()), DL(*TM.getDataLayout()), + TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()), TII(*MF->getSubtarget().getInstrInfo()), TLI(*MF->getSubtarget().getTargetLowering()), TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), @@ -2037,7 +2037,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // own moves. Second, this check is necessary because FastISel doesn't // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. - EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); + EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Handle integer promotions, though, because they're common and easy. if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) { diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index ecaa2c972719..cc306cbf5ae4 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -90,7 +90,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI); + GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI, + mf.getDataLayout()); CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, Fn->isVarArg(), Outs, Fn->getContext()); @@ -106,9 +107,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (AI->isStaticAlloca()) { const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); Type *Ty = AI->getAllocatedType(); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); + uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), + std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. @@ -118,10 +119,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); } else { - unsigned Align = std::max( - (unsigned)TLI->getDataLayout()->getPrefTypeAlignment( - AI->getAllocatedType()), - AI->getAlignment()); + unsigned Align = + std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment( + AI->getAllocatedType()), + AI->getAlignment()); unsigned StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) @@ -138,7 +139,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); std::vector<TargetLowering::AsmOperandInfo> Ops = - TLI->ParseConstraints(TRI, CS); + TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS); for (size_t I = 0, E = Ops.size(); I != E; ++I) { TargetLowering::AsmOperandInfo &Op = Ops[I]; if (Op.Type == InlineAsm::isClobber) { @@ -148,7 +149,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode, Op.ConstraintVT); if (PhysReg.first == SP) - MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true); + MF->getFrameInfo()->setHasOpaqueSPAdjustment(true); } } } @@ -236,7 +237,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, assert(PHIReg && "PHI node does not have an assigned virtual register!"); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, PN->getType(), ValueVTs); + ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); @@ -366,7 +367,7 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, Ty, ValueVTs); + ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { @@ -413,7 +414,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { return; SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(*TLI, Ty, ValueVTs); + ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); assert(ValueVTs.size() == 1 && "PHIs with non-vector integer types should have a single VT."); EVT IntVT = ValueVTs[0]; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 42595cb010c2..5ec10308dc28 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -406,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { - Align = MF->getTarget().getDataLayout()->getPrefTypeAlignment(Type); + Align = MF->getDataLayout().getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! - Align = MF->getTarget().getDataLayout()->getTypeAllocSize(Type); + Align = MF->getDataLayout().getTypeAllocSize(Type); } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index c0d7871bf08b..21ab07234c81 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -65,7 +65,7 @@ class SelectionDAGLegalize { SmallSetVector<SDNode *, 16> *UpdatedNodes; EVT getSetCCResultType(EVT VT) const { - return TLI.getSetCCResultType(*DAG.getContext(), VT); + return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } // Libcall insertion helpers. @@ -269,7 +269,8 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { } } - SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); + SDValue CPIdx = + DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); if (Extend) { SDValue Result = @@ -331,7 +332,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, MachinePointerInfo(), StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy(AS)); + SDValue Increment = DAG.getConstant( + RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout(), AS)); SmallVector<SDValue, 8> Stores; unsigned Offset = 0; @@ -385,24 +387,27 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, int IncrementSize = NumBits / 8; // Divide the stored value in two parts. - SDValue ShiftAmount = DAG.getConstant(NumBits, dl, - TLI.getShiftAmountTy(Val.getValueType())); + SDValue ShiftAmount = + DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Val.getValueType(), + DAG.getDataLayout())); SDValue Lo = Val; SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); // Store the two parts SDValue Store1, Store2; - Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, - ST->getPointerInfo(), NewStoredVT, + Store1 = DAG.getTruncStore(Chain, dl, + DAG.getDataLayout().isLittleEndian() ? Lo : Hi, + Ptr, ST->getPointerInfo(), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, TLI.getPointerTy(AS))); + DAG.getConstant(IncrementSize, dl, + TLI.getPointerTy(DAG.getDataLayout(), AS))); Alignment = MinAlign(Alignment, IncrementSize); - Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), - NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), - Alignment, ST->getAAInfo()); + Store2 = DAG.getTruncStore( + Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, + ST->isVolatile(), ST->isNonTemporal(), Alignment, ST->getAAInfo()); SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -448,7 +453,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Make sure the stack slot is also aligned for the register type. SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy()); + SDValue Increment = + DAG.getConstant(RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout())); SmallVector<SDValue, 8> Stores; SDValue StackPtr = StackBase; unsigned Offset = 0; @@ -522,7 +528,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Load the value in two parts SDValue Lo, Hi; - if (TLI.isLittleEndian()) { + if (DAG.getDataLayout().isLittleEndian()) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), Alignment, @@ -549,8 +555,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, } // aggregate the two parts - SDValue ShiftAmount = DAG.getConstant(NumBits, dl, - TLI.getShiftAmountTy(Hi.getValueType())); + SDValue ShiftAmount = + DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Hi.getValueType(), + DAG.getDataLayout())); SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); @@ -581,7 +588,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, EVT VT = Tmp1.getValueType(); EVT EltVT = VT.getVectorElementType(); EVT IdxVT = Tmp3.getValueType(); - EVT PtrVT = TLI.getPointerTy(); + EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.CreateStackTemporary(VT); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); @@ -677,7 +684,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt(); SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32); SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32); - if (TLI.isBigEndian()) std::swap(Lo, Hi); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment, AAInfo); @@ -724,7 +732,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned Align = ST->getAlignment(); if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); + unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); if (Align < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } @@ -756,6 +764,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); + auto &DL = DAG.getDataLayout(); if (StWidth != StVT.getStoreSizeInBits()) { // Promote to a byte-sized store with upper bits zero if not @@ -782,7 +791,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Lo, Hi; unsigned IncrementSize; - if (TLI.isLittleEndian()) { + if (DL.isLittleEndian()) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), @@ -795,9 +804,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(RoundWidth, dl, - TLI.getShiftAmountTy(Value.getValueType()))); + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(RoundWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -806,9 +816,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X // Store the top RoundWidth bits. - Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(ExtraWidth, dl, - TLI.getShiftAmountTy(Value.getValueType()))); + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(ExtraWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, AAInfo); @@ -838,7 +849,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // expand it. if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); + unsigned ABIAlignment = DL.getABITypeAlignment(Ty); if (Align < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } @@ -890,8 +901,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // expand it. if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getDataLayout()->getABITypeAlignment(Ty); + unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); if (Align < ABIAlignment){ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); } @@ -995,8 +1005,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); SDValue Lo, Hi, Ch; unsigned IncrementSize; + auto &DL = DAG.getDataLayout(); - if (TLI.isLittleEndian()) { + if (DL.isLittleEndian()) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), @@ -1020,9 +1031,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Hi.getValue(1)); // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(RoundWidth, dl, - TLI.getShiftAmountTy(Hi.getValueType()))); + Hi = DAG.getNode( + ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, dl, + TLI.getShiftAmountTy(Hi.getValueType(), DL))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1051,9 +1063,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Hi.getValue(1)); // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(ExtraWidth, dl, - TLI.getShiftAmountTy(Hi.getValueType()))); + Hi = DAG.getNode( + ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, dl, + TLI.getShiftAmountTy(Hi.getValueType(), DL))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1086,7 +1099,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned Align = LD->getAlignment(); if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); + unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); if (Align < ABIAlignment){ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); } @@ -1439,7 +1452,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); - Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout())); StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); SDValue NewLoad; @@ -1491,7 +1504,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); - Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout())); SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); @@ -1569,15 +1582,16 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { // Convert to an integer with the same sign bit. SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); } else { + auto &DL = DAG.getDataLayout(); // Store the float to memory, then load the sign part out as an integer. - MVT LoadTy = TLI.getPointerTy(); + MVT LoadTy = TLI.getPointerTy(DL); // First create a temporary that is aligned for both the load and store. SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); // Then store the float to it. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(), false, false, 0); - if (TLI.isBigEndian()) { + if (DL.isBigEndian()) { assert(FloatVT.isByteSized() && "Unsupported floating point type!"); // Load out a legal integer with the same sign bit as the float. SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), @@ -1599,9 +1613,10 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { (FloatVT.getSizeInBits() - 8 * ByteOffset); assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?"); if (BitShift) - SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit, - DAG.getConstant(BitShift, dl, - TLI.getShiftAmountTy(SignBit.getValueType()))); + SignBit = DAG.getNode( + ISD::SHL, dl, LoadTy, SignBit, + DAG.getConstant(BitShift, dl, + TLI.getShiftAmountTy(SignBit.getValueType(), DL))); } } // Now get the sign bit proper, by seeing whether the value is negative. @@ -1777,9 +1792,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT DestVT, SDLoc dl) { // Create the stack frame object. - unsigned SrcAlign = - TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType(). - getTypeForEVT(*DAG.getContext())); + unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment( + SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); @@ -1790,7 +1804,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); - unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType); + unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType); // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. @@ -1994,7 +2008,8 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { } } Constant *CP = ConstantVector::get(CV); - SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy()); + SDValue CPIdx = + DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), @@ -2058,7 +2073,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); + TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); @@ -2106,7 +2121,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); + TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); @@ -2140,7 +2155,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); + TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); @@ -2277,7 +2292,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, Args.push_back(Entry); SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); + TLI.getPointerTy(DAG.getDataLayout())); SDLoc dl(Node); TargetLowering::CallLoweringInfo CLI(DAG); @@ -2389,7 +2404,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, Args.push_back(Entry); SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); + TLI.getPointerTy(DAG.getDataLayout())); SDLoc dl(Node); TargetLowering::CallLoweringInfo CLI(DAG); @@ -2426,7 +2441,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Hi = StackSlot; SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), StackSlot, WordOff); - if (TLI.isLittleEndian()) + if (DAG.getDataLayout().isLittleEndian()) std::swap(Hi, Lo); // if signed map to unsigned space @@ -2509,8 +2524,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (!isSigned) { SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); - SDValue ShiftConst = - DAG.getConstant(1, dl, TLI.getShiftAmountTy(Op0.getValueType())); + SDValue ShiftConst = DAG.getConstant( + 1, dl, TLI.getShiftAmountTy(Op0.getValueType(), DAG.getDataLayout())); SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); SDValue AndConst = DAG.getConstant(1, dl, MVT::i64); SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); @@ -2545,7 +2560,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, MVT::i64), ISD::SETUGE); SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0); - EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType()); + EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType(), DAG.getDataLayout()); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, DAG.getConstant(32, dl, SHVT)); @@ -2584,11 +2599,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) } - if (TLI.isLittleEndian()) FF <<= 32; + if (DAG.getDataLayout().isLittleEndian()) + FF <<= 32; Constant *FudgeFactor = ConstantInt::get( Type::getInt64Ty(*DAG.getContext()), FF); - SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); + SDValue CPIdx = + DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); Alignment = std::min(Alignment, 4u); @@ -2699,7 +2716,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, /// Open code the operations for BSWAP of the specified operation. SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { EVT VT = Op.getValueType(); - EVT SHVT = TLI.getShiftAmountTy(VT); + EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled Expand type in BSWAP!"); @@ -2756,7 +2773,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { EVT VT = Op.getValueType(); - EVT ShVT = TLI.getShiftAmountTy(VT); + EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned Len = VT.getSizeInBits(); assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && @@ -2814,7 +2831,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // // Ref: "Hacker's Delight" by Henry Warren EVT VT = Op.getValueType(); - EVT ShVT = TLI.getShiftAmountTy(VT); + EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned len = VT.getSizeInBits(); for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); @@ -2903,10 +2920,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__sync_synchronize", - TLI.getPointerTy()), std::move(Args), 0); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); @@ -3002,10 +3021,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If this operation is not supported, lower it to 'abort()' call TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("abort", TLI.getPointerTy()), - std::move(Args), 0); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("abort", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); @@ -3028,7 +3049,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // SAR. However, it is doubtful that any exist. EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); - EVT ShiftAmountTy = TLI.getShiftAmountTy(VT); + EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); if (VT.isVector()) ShiftAmountTy = VT; unsigned BitsDiff = VT.getScalarType().getSizeInBits() - @@ -3092,9 +3113,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2 = Node->getOperand(1); unsigned Align = Node->getConstantOperandVal(3); - SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, - MachinePointerInfo(V), - false, false, false, 0); + SDValue VAListLoad = + DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, false, 0); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { @@ -3111,10 +3132,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(TLI.getDataLayout()-> - getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), - dl, - VAList.getValueType())); + DAG.getConstant(DAG.getDataLayout().getTypeAllocSize( + VT.getTypeForEVT(*DAG.getContext())), + dl, VAList.getValueType())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, MachinePointerInfo(V), false, false, 0); @@ -3129,9 +3149,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // output, returning the chain. const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); - Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(2), MachinePointerInfo(VS), - false, false, false, 0); + Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, + Node->getOperand(0), Node->getOperand(2), + MachinePointerInfo(VS), false, false, false, 0); Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), MachinePointerInfo(VD), false, false, 0); Results.push_back(Tmp1); @@ -3226,14 +3246,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } unsigned Idx = Mask[i]; if (Idx < NumElems) - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Op0, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()))); + Ops.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); else - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Op1, - DAG.getConstant(Idx - NumElems, dl, - TLI.getVectorIdxTy()))); + Ops.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, + DAG.getConstant(Idx - NumElems, dl, + TLI.getVectorIdxTy(DAG.getDataLayout())))); } Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); @@ -3247,8 +3267,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { // 1 -> Hi Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), - DAG.getConstant(OpTy.getSizeInBits()/2, dl, - TLI.getShiftAmountTy(Node->getOperand(0).getValueType()))); + DAG.getConstant(OpTy.getSizeInBits() / 2, dl, + TLI.getShiftAmountTy( + Node->getOperand(0).getValueType(), + DAG.getDataLayout()))); Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); } else { // 0 -> Lo @@ -3646,8 +3668,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) { Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); - SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), dl, - TLI.getShiftAmountTy(HalfType)); + SDValue Shift = + DAG.getConstant(HalfType.getSizeInBits(), dl, + TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); break; @@ -3759,12 +3782,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // The high part is obtained by SRA'ing all but one of the bits of low // part. unsigned LoSize = VT.getSizeInBits(); - SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS, - DAG.getConstant(LoSize - 1, dl, - TLI.getPointerTy())); - SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS, - DAG.getConstant(LoSize - 1, dl, - TLI.getPointerTy())); + SDValue HiLHS = + DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + SDValue HiRHS = + DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); // Here we're passing the 2 arguments explicitly as 4 arguments that are // pre-lowered to the correct types. This all depends upon WideVT not @@ -3785,8 +3810,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } if (isSigned) { - Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, dl, - TLI.getShiftAmountTy(BottomHalf.getValueType())); + Tmp1 = DAG.getConstant( + VT.getSizeInBits() - 1, dl, + TLI.getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout())); Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1, ISD::SETNE); @@ -3802,9 +3828,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT PairTy = Node->getValueType(0); Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); - Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, - DAG.getConstant(PairTy.getSizeInBits()/2, dl, - TLI.getShiftAmountTy(PairTy))); + Tmp2 = DAG.getNode( + ISD::SHL, dl, PairTy, Tmp2, + DAG.getConstant(PairTy.getSizeInBits() / 2, dl, + TLI.getShiftAmountTy(PairTy, DAG.getDataLayout()))); Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); break; } @@ -3828,9 +3855,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Table = Node->getOperand(1); SDValue Index = Node->getOperand(2); - EVT PTy = TLI.getPointerTy(); + EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); - const DataLayout &TD = *TLI.getDataLayout(); + const DataLayout &TD = DAG.getDataLayout(); unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); @@ -3936,7 +3963,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { assert(!TLI.isOperationExpand(ISD::SELECT, VT) && "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be " "expanded."); - EVT CCVT = TLI.getSetCCResultType(*DAG.getContext(), CmpVT); + EVT CCVT = + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC); Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4)); break; @@ -4036,14 +4064,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 8> Scalars; for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(0), - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); - SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(1), - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); + SDValue Ex = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0), + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Sh = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1), + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, VT.getScalarType(), Ex, Sh)); } @@ -4114,9 +4140,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); - Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, - DAG.getConstant(DiffBits, dl, - TLI.getShiftAmountTy(NVT))); + Tmp1 = DAG.getNode( + ISD::SRL, dl, NVT, Tmp1, + DAG.getConstant(DiffBits, dl, + TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); Results.push_back(Tmp1); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 37fdf4453fd4..3c50a4155731 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -218,29 +218,35 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { unsigned RSize = RVT.getSizeInBits(); // First get the sign bit of second operand. - SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT), - DAG.getConstant(RSize - 1, dl, - TLI.getShiftAmountTy(RVT))); + SDValue SignBit = DAG.getNode( + ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT), + DAG.getConstant(RSize - 1, dl, + TLI.getShiftAmountTy(RVT, DAG.getDataLayout()))); SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); // Shift right or sign-extend it if the two operands have different types. int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); if (SizeDiff > 0) { - SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit, - DAG.getConstant(SizeDiff, dl, - TLI.getShiftAmountTy(SignBit.getValueType()))); + SignBit = + DAG.getNode(ISD::SRL, dl, RVT, SignBit, + DAG.getConstant(SizeDiff, dl, + TLI.getShiftAmountTy(SignBit.getValueType(), + DAG.getDataLayout()))); SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); } else if (SizeDiff < 0) { SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); - SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit, - DAG.getConstant(-SizeDiff, dl, - TLI.getShiftAmountTy(SignBit.getValueType()))); + SignBit = + DAG.getNode(ISD::SHL, dl, LVT, SignBit, + DAG.getConstant(-SizeDiff, dl, + TLI.getShiftAmountTy(SignBit.getValueType(), + DAG.getDataLayout()))); } // Clear the sign bit of the first operand. - SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT), - DAG.getConstant(LSize - 1, dl, - TLI.getShiftAmountTy(LVT))); + SDValue Mask = DAG.getNode( + ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT), + DAG.getConstant(LSize - 1, dl, + TLI.getShiftAmountTy(LVT, DAG.getDataLayout()))); Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT)); LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f41202c4f8a4..9f060a09a0f3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -282,7 +282,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { Lo = BitConvertToInteger(Lo); Hi = BitConvertToInteger(Hi); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); InOp = DAG.getNode(ISD::ANY_EXTEND, dl, @@ -310,8 +310,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDLoc dl(N); unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getConstant(DiffBits, dl, TLI.getShiftAmountTy(NVT))); + return DAG.getNode( + ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, + TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { @@ -799,7 +801,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { } // Handle endianness of the load. - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::reverse(Parts.begin(), Parts.end()); // Assemble the parts in the promoted type. @@ -809,8 +811,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); // Shift it to the right position and "or" it in. Part = DAG.getNode(ISD::SHL, dl, NVT, Part, - DAG.getConstant(i*RegVT.getSizeInBits(), dl, - TLI.getPointerTy())); + DAG.getConstant(i * RegVT.getSizeInBits(), dl, + TLI.getPointerTy(DAG.getDataLayout()))); Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part); } @@ -1004,7 +1006,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, DAG.getConstant(OVT.getSizeInBits(), dl, - TLI.getPointerTy())); + TLI.getPointerTy(DAG.getDataLayout()))); return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi); } @@ -1063,7 +1065,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, // Promote the index. SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N), - TLI.getVectorIdxTy()); + TLI.getVectorIdxTy(DAG.getDataLayout())); return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Idx), 0); } @@ -1356,9 +1358,9 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { return ExpandChainLibCall(LC, Node, false); } -/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded, +/// N is a shift by a value that needs to be expanded, /// and the shift amount is a constant 'Amt'. Expand the operation. -void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, +void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, SDValue &Lo, SDValue &Hi) { SDLoc DL(N); // Expand the incoming operand to be shifted, so that we have its parts @@ -1379,9 +1381,9 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, EVT ShTy = N->getOperand(1).getValueType(); if (N->getOpcode() == ISD::SHL) { - if (Amt > VTBits) { + if (Amt.ugt(VTBits)) { Lo = Hi = DAG.getConstant(0, DL, NVT); - } else if (Amt > NVTBits) { + } else if (Amt.ugt(NVTBits)) { Lo = DAG.getConstant(0, DL, NVT); Hi = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy)); @@ -1403,16 +1405,15 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, DAG.getNode(ISD::SHL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)), DAG.getNode(ISD::SRL, DL, NVT, InL, - DAG.getConstant(NVTBits - Amt, DL, ShTy))); + DAG.getConstant(-Amt + NVTBits, DL, ShTy))); } return; } if (N->getOpcode() == ISD::SRL) { - if (Amt > VTBits) { - Lo = DAG.getConstant(0, DL, NVT); - Hi = DAG.getConstant(0, DL, NVT); - } else if (Amt > NVTBits) { + if (Amt.ugt(VTBits)) { + Lo = Hi = DAG.getConstant(0, DL, NVT); + } else if (Amt.ugt(NVTBits)) { Lo = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy)); Hi = DAG.getConstant(0, DL, NVT); @@ -1424,19 +1425,19 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, DAG.getNode(ISD::SRL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy)), DAG.getNode(ISD::SHL, DL, NVT, InH, - DAG.getConstant(NVTBits - Amt, DL, ShTy))); + DAG.getConstant(-Amt + NVTBits, DL, ShTy))); Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); } return; } assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); - if (Amt > VTBits) { + if (Amt.ugt(VTBits)) { Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(NVTBits - 1, DL, ShTy)); - } else if (Amt > NVTBits) { + } else if (Amt.ugt(NVTBits)) { Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, - DAG.getConstant(Amt-NVTBits, DL, ShTy)); + DAG.getConstant(Amt - NVTBits, DL, ShTy)); Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(NVTBits - 1, DL, ShTy)); } else if (Amt == NVTBits) { @@ -1448,7 +1449,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, DAG.getNode(ISD::SRL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy)), DAG.getNode(ISD::SHL, DL, NVT, InH, - DAG.getConstant(NVTBits - Amt, DL, ShTy))); + DAG.getConstant(-Amt + NVTBits, DL, ShTy))); Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); } } @@ -1808,7 +1809,8 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part replicates the sign bit of Lo, make it explicit. Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(NVTBits - 1, dl, TLI.getPointerTy())); + DAG.getConstant(NVTBits - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); } } @@ -1975,7 +1977,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // lo part. unsigned LoSize = Lo.getValueType().getSizeInBits(); Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy())); + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); } else if (ExtType == ISD::ZEXTLOAD) { // The high part is just a zero. Hi = DAG.getConstant(0, dl, NVT); @@ -1984,7 +1987,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // The high part is undefined. Hi = DAG.getUNDEF(NVT); } - } else if (TLI.isLittleEndian()) { + } else if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), isVolatile, isNonTemporal, isInvariant, Alignment, @@ -2039,15 +2042,16 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, if (ExcessBits < NVT.getSizeInBits()) { // Transfer low bits from the bottom of Hi to the top of Lo. - Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, - DAG.getNode(ISD::SHL, dl, NVT, Hi, - DAG.getConstant(ExcessBits, dl, - TLI.getPointerTy()))); + Lo = DAG.getNode( + ISD::OR, dl, NVT, Lo, + DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(ExcessBits, dl, + TLI.getPointerTy(DAG.getDataLayout())))); // Move high bits to the right position in Hi. - Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, - NVT, Hi, + Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT, + Hi, DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, - TLI.getPointerTy())); + TLI.getPointerTy(DAG.getDataLayout()))); } } @@ -2173,7 +2177,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // If we can emit an efficient shift operation, do so now. Check to see if // the RHS is a constant. if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) - return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi); + return ExpandShiftByConstant(N, CN->getAPIntValue(), Lo, Hi); // If we can determine that the high bit of the shift is zero or one, even if // the low bits are variable, emit this shift in an optimized form. @@ -2206,7 +2210,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // have an illegal type. Fix that first by casting the operand, otherwise // the new SHL_PARTS operation would need further legalization. SDValue ShiftOp = N->getOperand(1); - EVT ShiftTy = TLI.getShiftAmountTy(VT); + EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); assert(ShiftTy.getScalarType().getSizeInBits() >= Log2_32_Ceil(VT.getScalarType().getSizeInBits()) && "ShiftAmountTy is too small to cover the range of this type!"); @@ -2276,8 +2280,9 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0)); // The high part is obtained by SRA'ing all but one of the bits of low part. unsigned LoSize = NVT.getSizeInBits(); - Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy())); + Hi = DAG.getNode( + ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); } else { // For example, extension of an i48 to an i64. The operand type necessarily // promotes to the result type, so will end up being expanded too. @@ -2312,7 +2317,7 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { // things like sextinreg V:i64 from i8. Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl, - TLI.getPointerTy())); + TLI.getPointerTy(DAG.getDataLayout()))); } else { // For example, extension of an i48 to an i64. Leave the low part alone, // sext_inreg the high part. @@ -2355,10 +2360,10 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); - Hi = DAG.getNode(ISD::SRL, dl, - N->getOperand(0).getValueType(), N->getOperand(0), + Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(), + N->getOperand(0), DAG.getConstant(NVT.getSizeInBits(), dl, - TLI.getPointerTy())); + TLI.getPointerTy(DAG.getDataLayout()))); Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); } @@ -2414,7 +2419,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, } Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); - EVT PtrVT = TLI.getPointerTy(); + EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); // Replace this with a libcall that will check overflow. @@ -2845,7 +2850,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { Alignment, AAInfo); } - if (TLI.isLittleEndian()) { + if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. GetExpandedInteger(N->getValue(), Lo, Hi); @@ -2882,11 +2887,12 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Transfer high bits from the top of Lo to the bottom of Hi. Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, - TLI.getPointerTy())); - Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, - DAG.getNode(ISD::SRL, dl, NVT, Lo, - DAG.getConstant(ExcessBits, dl, - TLI.getPointerTy()))); + TLI.getPointerTy(DAG.getDataLayout()))); + Hi = DAG.getNode( + ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SRL, dl, NVT, Lo, + DAG.getConstant(ExcessBits, dl, + TLI.getPointerTy(DAG.getDataLayout())))); } // Store both the high bits and maybe some of the low bits. @@ -2956,14 +2962,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { ISD::SETLT); // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. - SDValue FudgePtr = DAG.getConstantPool( - ConstantInt::get(*DAG.getContext(), FF.zext(64)), - TLI.getPointerTy()); + SDValue FudgePtr = + DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)), + TLI.getPointerTy(DAG.getDataLayout())); // Get a pointer to FF if the sign bit was set, or to 0 otherwise. SDValue Zero = DAG.getIntPtrConstant(0, dl); SDValue Four = DAG.getIntPtrConstant(4, dl); - if (TLI.isBigEndian()) std::swap(Zero, Four); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Zero, Four); SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four); unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); @@ -3113,9 +3120,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { for (unsigned i = 0; i < NumOperands; ++i) { SDValue Op = N->getOperand(i); for (unsigned j = 0; j < NumElem; ++j) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - InElemTy, Op, DAG.getConstant(j, dl, - TLI.getVectorIdxTy())); + SDValue Ext = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InElemTy, Op, + DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); } } @@ -3142,7 +3149,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); - SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl, TLI.getVectorIdxTy()); + SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl, + TLI.getVectorIdxTy(DAG.getDataLayout())); SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, V0->getValueType(0).getScalarType(), V0, V1); @@ -3179,8 +3187,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i=0; i<NumElem; ++i) { // Extract element from incoming vector - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, - Incoming, DAG.getConstant(i, dl, TLI.getVectorIdxTy())); + SDValue Ex = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); NewOps.push_back(Tr); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 9c297698c1db..a7392fabf1e7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1006,7 +1006,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index) { SDLoc dl(Index); // Make sure the index type is big enough to compute in. - Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy()); + Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy(DAG.getDataLayout())); // Calculate the element offset and add it to the pointer. unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. @@ -1030,7 +1030,7 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, DAG.getConstant(LVT.getSizeInBits(), dlHi, - TLI.getPointerTy())); + TLI.getPointerTy(DAG.getDataLayout()))); return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); } @@ -1079,7 +1079,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); + TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); @@ -1117,7 +1117,7 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, DAG.getConstant(LoVT.getSizeInBits(), dl, - TLI.getPointerTy())); + TLI.getPointerTy(DAG.getDataLayout()))); Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 2f2778982611..d1131a74cf17 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -73,7 +73,7 @@ private: } EVT getSetCCResultType(EVT VT) const { - return TLI.getSetCCResultType(*DAG.getContext(), VT); + return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } /// IgnoreNodeResults - Pretend all of this node's results are legal. @@ -167,7 +167,7 @@ private: SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); SDValue JoinIntegers(SDValue Lo, SDValue Hi); SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); - + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); @@ -347,7 +347,7 @@ private: void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandShiftByConstant(SDNode *N, unsigned Amt, + void ExpandShiftByConstant(SDNode *N, const APInt &Amt, SDValue &Lo, SDValue &Hi); bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 330c31ce0eec..14d8f7762086 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -60,18 +60,20 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case TargetLowering::TypeExpandInteger: - case TargetLowering::TypeExpandFloat: + case TargetLowering::TypeExpandFloat: { + auto &DL = DAG.getDataLayout(); // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); - if (TLI.hasBigEndianPartOrdering(InVT) != - TLI.hasBigEndianPartOrdering(OutVT)) + if (TLI.hasBigEndianPartOrdering(InVT, DL) != + TLI.hasBigEndianPartOrdering(OutVT, DL)) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; + } case TargetLowering::TypeSplitVector: GetSplitVector(InOp, Lo, Hi); - if (TLI.hasBigEndianPartOrdering(OutVT)) + if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -88,7 +90,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); - if (TLI.hasBigEndianPartOrdering(OutVT)) + if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -119,9 +121,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SmallVector<SDValue, 8> Vals; for (unsigned i = 0; i < NumElems; ++i) - Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, - CastInOp, DAG.getConstant(i, dl, - TLI.getVectorIdxTy()))); + Vals.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); // Build Lo, Hi pair by pairing extracted elements if needed. unsigned Slot = 0; @@ -131,7 +133,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHS = Vals[Slot]; SDValue RHS = Vals[Slot + 1]; - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(LHS, RHS); Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, @@ -143,7 +145,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { Lo = Vals[Slot++]; Hi = Vals[Slot++]; - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); return; @@ -155,9 +157,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. - unsigned Alignment = - TLI.getDataLayout()->getPrefTypeAlignment(NOutVT. - getTypeForEVT(*DAG.getContext())); + unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment( + NOutVT.getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); @@ -182,7 +183,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. - if (TLI.hasBigEndianPartOrdering(OutVT)) + if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) std::swap(Lo, Hi); } @@ -241,7 +242,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, DAG.getConstant(1, dl, Idx.getValueType())); Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); } @@ -282,7 +283,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, Hi.getValue(1)); // Handle endianness of the load. - if (TLI.hasBigEndianPartOrdering(ValueVT)) + if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -302,7 +303,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); // Handle endianness of the load. - if (TLI.hasBigEndianPartOrdering(OVT)) + if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout())) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -325,7 +326,7 @@ void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements, if (NumElements > 1) { NumElements >>= 1; SplitInteger(Op, Parts[0], Parts[1]); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Parts[0], Parts[1]); IntegerToVector(Parts[0], NumElements, Ops, EltVT); IntegerToVector(Parts[1], NumElements, Ops, EltVT); @@ -389,7 +390,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { for (unsigned i = 0; i < NumElts; ++i) { SDValue Lo, Hi; GetExpandedOp(N->getOperand(i), Lo, Hi); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); NewElts.push_back(Lo); NewElts.push_back(Hi); @@ -431,7 +432,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { SDValue Lo, Hi; GetExpandedOp(Val, Lo, Hi); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); SDValue Idx = N->getOperand(2); @@ -481,7 +482,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDValue Lo, Hi; GetExpandedOp(St->getValue(), Lo, Hi); - if (TLI.hasBigEndianPartOrdering(ValueVT)) + if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index ee844a8a4c58..83d4ad5ea1f4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -503,7 +503,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { // Instead, we load all significant words, mask bits off, and concatenate // them to form each element. Finally, they are extended to destination // scalar type to build the destination vector. - EVT WideVT = TLI.getPointerTy(); + EVT WideVT = TLI.getPointerTy(DAG.getDataLayout()); assert(WideVT.isRound() && "Could not handle the sophisticated case when the widest integer is" @@ -563,7 +563,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { SDValue Lo, Hi, ShAmt; if (BitOffset < WideBits) { - ShAmt = DAG.getConstant(BitOffset, dl, TLI.getShiftAmountTy(WideVT)); + ShAmt = DAG.getConstant( + BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); } @@ -573,8 +574,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { WideIdx++; BitOffset -= WideBits; if (BitOffset > 0) { - ShAmt = DAG.getConstant(SrcEltBits - BitOffset, dl, - TLI.getShiftAmountTy(WideVT)); + ShAmt = DAG.getConstant( + SrcEltBits - BitOffset, dl, + TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); } @@ -592,8 +594,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); break; case ISD::SEXTLOAD: - ShAmt = DAG.getConstant(WideBits - SrcEltBits, dl, - TLI.getShiftAmountTy(WideVT)); + ShAmt = + DAG.getConstant(WideBits - SrcEltBits, dl, + TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); @@ -663,8 +666,9 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { // and save them into memory individually. SmallVector<SDValue, 8> Stores; for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - RegSclVT, Value, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); + SDValue Ex = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, RegSclVT, Value, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, @@ -803,7 +807,7 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { // Place the extended lanes into the correct locations. int ExtLaneScale = NumSrcElements / NumElements; - int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; for (int i = 0; i < NumElements; ++i) ShuffleMask[i * ExtLaneScale + EndianOffset] = i; @@ -858,7 +862,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { ShuffleMask.push_back(i); int ExtLaneScale = NumSrcElements / NumElements; - int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; for (int i = 0; i < NumElements; ++i) ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; @@ -995,12 +999,15 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { SDLoc dl(Op); SmallVector<SDValue, 8> Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { - SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy())); - SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy())); + SDValue LHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue RHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); Ops[i] = DAG.getNode(ISD::SETCC, dl, - TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT), + TLI.getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), TmpEltVT), LHSElem, RHSElem, CC); Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getConstant(APInt::getAllOnesValue diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 905492c202ca..4348ab79f7d1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -251,8 +251,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { Op = GetScalarizedVector(Op); } else { EVT VT = OpVT.getVectorElementType(); - Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, - DAG.getConstant(0, DL, TLI.getVectorIdxTy())); + Op = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); } @@ -384,10 +385,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { RHS = GetScalarizedVector(RHS); } else { EVT VT = OpVT.getVectorElementType(); - LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, - DAG.getConstant(0, DL, TLI.getVectorIdxTy())); - RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, - DAG.getConstant(0, DL, TLI.getVectorIdxTy())); + LHS = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + RHS = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } // Turn it into a scalar SETCC. @@ -742,7 +745,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // expanded pieces. if (LoVT == HiVT) { GetExpandedOp(InOp, Lo, Hi); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); @@ -761,12 +764,12 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // In the general case, convert the input to an integer and split it by hand. EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(LoIntVT, HiIntVT); SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); @@ -819,7 +822,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl, - TLI.getVectorIdxTy())); + TLI.getVectorIdxTy(DAG.getDataLayout()))); } void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, @@ -840,7 +843,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, // Store the new subvector into the specified index. SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); - unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType); + unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(), false, false, 0); @@ -898,9 +901,10 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Lo.getValueType(), Lo, Elt, Idx); else - Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, - DAG.getConstant(IdxVal - LoNumElts, dl, - TLI.getVectorIdxTy())); + Hi = + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, + DAG.getConstant(IdxVal - LoNumElts, dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); return; } @@ -919,8 +923,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // so use a truncating store. SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); - unsigned Alignment = - TLI.getDataLayout()->getPrefTypeAlignment(VecType); + unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, false, false, 0); @@ -1292,10 +1295,9 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, Idx -= Input * NewElts; // Extract the vector element by hand. - SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], - DAG.getConstant(Idx, dl, - TLI.getVectorIdxTy()))); + SVOps.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input], + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); } // Construct the Lo/Hi output using a BUILD_VECTOR. @@ -1472,7 +1474,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { Lo = BitConvertToInteger(Lo); Hi = BitConvertToInteger(Hi); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), @@ -1763,9 +1765,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { for (const SDValue &Op : N->op_values()) { for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); i != e; ++i) { - Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, - Op, DAG.getConstant(i, DL, TLI.getVectorIdxTy()))); - + Elts.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); } } @@ -1829,10 +1831,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { // type. This should normally be something that ends up being legal directly, // but in theory if a target has very wide vectors and an annoyingly // restricted set of legal types, this split can chain to build things up. - return IsFloat ? - DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec, - DAG.getTargetConstant(0, DL, TLI.getPointerTy())) : - DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); + return IsFloat + ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec, + DAG.getTargetConstant( + 0, DL, TLI.getPointerTy(DAG.getDataLayout()))) + : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); } SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { @@ -2062,12 +2065,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { // } while (CurNumElts != 0) { while (CurNumElts >= NumElts) { - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getConstant(Idx, dl, - TLI.getVectorIdxTy())); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getConstant(Idx, dl, - TLI.getVectorIdxTy())); + SDValue EOp1 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp2 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); Idx += NumElts; CurNumElts -= NumElts; @@ -2079,14 +2082,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp1, - DAG.getConstant(Idx, dl, - TLI.getVectorIdxTy())); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp2, - DAG.getConstant(Idx, dl, - TLI.getVectorIdxTy())); + SDValue EOp1 = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp2 = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2); } @@ -2123,9 +2124,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { SDValue VecOp = DAG.getUNDEF(NextVT); unsigned NumToInsert = ConcatEnd - Idx - 1; for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, - ConcatOps[OpIdx], - DAG.getConstant(i, dl, TLI.getVectorIdxTy())); + VecOp = DAG.getNode( + ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx], + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; @@ -2211,8 +2212,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { } if (InVTNumElts % WidenNumElts == 0) { - SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, - DAG.getConstant(0, DL, TLI.getVectorIdxTy())); + SDValue InVal = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); @@ -2226,8 +2228,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned MinElts = std::min(InVTNumElts, WidenNumElts); unsigned i; for (i=0; i < MinElts; ++i) { - SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getConstant(i, DL, TLI.getVectorIdxTy())); + SDValue Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else @@ -2453,8 +2456,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { if (InputWidened) InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) - Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, dl, TLI.getVectorIdxTy())); + Ops[Idx++] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -2511,8 +2515,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { if (InVTNumElts % WidenNumElts == 0) { // Extract the input and convert the shorten input vector. - InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy())); + InOp = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2527,8 +2532,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { unsigned MinElts = std::min(InVTNumElts, WidenNumElts); unsigned i; for (i=0; i < MinElts; ++i) { - SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, dl, TLI.getVectorIdxTy())); + SDValue ExtVal = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2570,8 +2576,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned NumElts = VT.getVectorNumElements(); unsigned i; for (i=0; i < NumElts; ++i) - Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(IdxVal + i, dl, TLI.getVectorIdxTy())); + Ops[i] = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(IdxVal + i, dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) @@ -2872,12 +2880,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && "We can't have the same type as we started with!"); if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) - InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, - DAG.getUNDEF(FixedVT), InOp, - DAG.getConstant(0, DL, TLI.getVectorIdxTy())); + InOp = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); else - InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, - DAG.getConstant(0, DL, TLI.getVectorIdxTy())); + InOp = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); break; } } @@ -2920,10 +2929,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { unsigned Opcode = N->getOpcode(); SmallVector<SDValue, 16> Ops(NumElts); for (unsigned i=0; i < NumElts; ++i) - Ops[i] = DAG.getNode(Opcode, dl, EltVT, - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, dl, - TLI.getVectorIdxTy()))); + Ops[i] = DAG.getNode( + Opcode, dl, EltVT, + DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } @@ -2943,8 +2953,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy())); + return DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } } @@ -2971,8 +2982,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) - Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, dl, TLI.getVectorIdxTy())); + Ops[Idx++] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } @@ -3053,7 +3065,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // Get a new SETCC node to compare the newly widened operands. // Only some of the compared elements are legal. - EVT SVT = TLI.getSetCCResultType(*DAG.getContext(), InOp0.getValueType()); + EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + InOp0.getValueType()); SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), SVT, InOp0, InOp1, N->getOperand(2)); @@ -3061,9 +3074,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { EVT ResVT = EVT::getVectorVT(*DAG.getContext(), SVT.getVectorElementType(), N->getValueType(0).getVectorNumElements()); - SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, - ResVT, WideSETCC, - DAG.getConstant(0, dl, TLI.getVectorIdxTy())); + SDValue CC = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); return PromoteTargetBoolean(CC, N->getValueType(0)); } @@ -3159,8 +3172,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); LdTy = NewLdTy; } - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], - DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy())); + VecOp = DAG.getNode( + ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], + DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } @@ -3407,9 +3421,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, if (NewVT.isVector()) { unsigned NumVTElts = NewVT.getVectorNumElements(); do { - SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, - DAG.getConstant(Idx, dl, - TLI.getVectorIdxTy())); + SDValue EOp = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, @@ -3429,8 +3443,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, // Readjust index position based on new vector type Idx = Idx * ValEltWidth / NewVTWidth; do { - SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, - DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy())); + SDValue EOp = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, + DAG.getConstant(Idx++, dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, @@ -3476,8 +3492,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, EVT ValEltVT = ValVT.getVectorElementType(); unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); - SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy())); + SDValue EOp = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, isVolatile, isNonTemporal, Align, @@ -3488,8 +3505,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, BasePtr, DAG.getConstant(Offset, dl, BasePtr.getValueType())); - SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy())); + SDValue EOp = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, @@ -3525,8 +3543,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { } if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy())); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); // Fall back to extract and build. SmallVector<SDValue, 16> Ops(WidenNumElts); @@ -3534,8 +3553,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { unsigned MinNumElts = std::min(WidenNumElts, InNumElts); unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) - Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())); + Ops[Idx] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); SDValue UndefVal = DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 00cbae3986cd..34e1a7001082 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -725,9 +725,8 @@ void ScheduleDAGLinearize::Schedule() { SmallVector<SDNode*, 8> Glues; unsigned DAGSize = 0; - for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(), - E = DAG->allnodes_end(); I != E; ++I) { - SDNode *N = I; + for (SDNode &Node : DAG->allnodes()) { + SDNode *N = &Node; // Use node id to record degree. unsigned Degree = N->use_size(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index b22d6edd85af..2a6c853a1d11 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -289,9 +289,8 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { /// ClusterNodes - Cluster certain nodes which should be scheduled together. /// void ScheduleDAGSDNodes::ClusterNodes() { - for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), - E = DAG->allnodes_end(); NI != E; ++NI) { - SDNode *Node = &*NI; + for (SDNode &NI : DAG->allnodes()) { + SDNode *Node = &NI; if (!Node || !Node->isMachineOpcode()) continue; @@ -308,9 +307,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // to their associated SUnits by holding SUnits table indices. A value // of -1 means the SDNode does not yet have an associated SUnit. unsigned NumNodes = 0; - for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), - E = DAG->allnodes_end(); NI != E; ++NI) { - NI->setNodeId(-1); + for (SDNode &NI : DAG->allnodes()) { + NI.setNodeId(-1); ++NumNodes; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index be5478275f99..14f44ccc60ce 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -151,8 +151,8 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { if (N->getOpcode() != ISD::BUILD_VECTOR) return false; bool IsAllUndef = true; - for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) + for (const SDValue &Op : N->op_values()) { + if (Op.getOpcode() == ISD::UNDEF) continue; IsAllUndef = false; // Do not accept build_vectors that aren't all constants or which have non-0 @@ -163,12 +163,11 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { // We only want to check enough bits to cover the vector elements, because // we care if the resultant vector is all zeros, not whether the individual // constants are. - SDValue Zero = N->getOperand(i); unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) { if (CN->getAPIntValue().countTrailingZeros() < EltSize) return false; - } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) { if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize) return false; } else @@ -921,7 +920,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return TLI->getDataLayout()->getABITypeAlignment(Ty); + return getDataLayout().getABITypeAlignment(Ty); } // EntryNode could meaningfully have debug info if we can find it... @@ -1184,7 +1183,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, // EltParts is currently in little endian order. If we actually want // big-endian order then reverse it now. - if (TLI->isBigEndian()) + if (getDataLayout().isBigEndian()) std::reverse(EltParts.begin(), EltParts.end()); // The elements must be reversed when the element order is different @@ -1234,7 +1233,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, } SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) { - return getConstant(Val, DL, TLI->getPointerTy(), isTarget); + return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget); } SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT, @@ -1303,7 +1302,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, "Cannot set target flags on target-independent globals"); // Truncate (with sign-extension) the offset value to the pointer size. - unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType()); + unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); @@ -1373,7 +1372,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); @@ -1400,7 +1399,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); @@ -1850,7 +1849,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - EVT ShTy = TLI->getShiftAmountTy(LHSTy); + EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout()); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -1864,10 +1863,10 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); unsigned StackAlign = - std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign); + std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); - return getFrameIndex(FrameIdx, TLI->getPointerTy()); + return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); } /// CreateStackTemporary - Create a stack temporary suitable for holding @@ -1877,13 +1876,13 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { VT2.getStoreSizeInBits())/8; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); - const DataLayout *TD = TLI->getDataLayout(); - unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), - TD->getPrefTypeAlignment(Ty2)); + const DataLayout &DL = getDataLayout(); + unsigned Align = + std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2)); MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI->getPointerTy()); + return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, @@ -1916,9 +1915,9 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, break; } - if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) { + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) { const APInt &C2 = N2C->getAPIntValue(); - if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { const APInt &C1 = N1C->getAPIntValue(); switch (Cond) { @@ -1936,8 +1935,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, } } } - if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) { - if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) { + if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) { + if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) { APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); switch (Cond) { default: break; @@ -2356,15 +2355,24 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. + // Output known-0 bits are also known if the top bits of each input are + // known to be clear. For example, if one input has the top 10 bits clear + // and the other has the top 8 bits clear, we know the top 7 bits of the + // output must be clear. computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); + unsigned KnownZeroHigh = KnownZero2.countLeadingOnes(); + unsigned KnownZeroLow = KnownZero2.countTrailingOnes(); computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); - KnownZeroOut = std::min(KnownZeroOut, + KnownZeroHigh = std::min(KnownZeroHigh, + KnownZero2.countLeadingOnes()); + KnownZeroLow = std::min(KnownZeroLow, KnownZero2.countTrailingOnes()); if (Op.getOpcode() == ISD::ADD) { - KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow); + if (KnownZeroHigh > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1); break; } @@ -2372,8 +2380,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // information if we know (at least) that the low two bits are clear. We // then return to the caller that the low bit is unknown but that other bits // are known zero. - if (KnownZeroOut >= 2) // ADDE - KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut); + if (KnownZeroLow >= 2) // ADDE + KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroLow); break; } case ISD::SREM: @@ -2814,7 +2822,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, // doesn't create new constants with different values. Nevertheless, the // opaque flag is preserved during folding to prevent future folding with // other constants. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) { const APInt &Val = C->getAPIntValue(); switch (Opcode) { default: break; @@ -2861,7 +2869,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, } // Constant fold unary operations with a floating point constant operand. - if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) { + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) { APFloat V = C->getValueAPF(); // make copy switch (Opcode) { case ISD::FNEG: @@ -2922,7 +2930,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, } // Constant fold unary operations with a vector integer or float operand. - if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) { + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) { if (BV->isConstant()) { switch (Opcode) { default: @@ -3278,8 +3286,8 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags *Flags) { - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); - ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); switch (Opcode) { default: break; case ISD::TokenFactor: @@ -3499,7 +3507,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, Ops.push_back(Op); continue; } - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getNode())) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { APInt Val = C->getAPIntValue(); Ops.push_back(SignExtendInReg(Val)); continue; @@ -3554,7 +3562,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, // if the indices are known different, extract the element from // the original vector. SDValue N1Op2 = N1.getOperand(2); - ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode()); + ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2); if (N1Op2C && N2C) { if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { @@ -3600,9 +3608,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, assert(VT.getSimpleVT() <= N1.getSimpleValueType() && "Extract subvector must be from larger vector to smaller vector!"); - if (isa<ConstantSDNode>(Index.getNode())) { + if (isa<ConstantSDNode>(Index)) { assert((VT.getVectorNumElements() + - cast<ConstantSDNode>(Index.getNode())->getZExtValue() + cast<ConstantSDNode>(Index)->getZExtValue() <= N1.getValueType().getVectorNumElements()) && "Extract subvector overflow!"); } @@ -3628,8 +3636,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, // Constant fold FP operations. bool HasFPExceptions = TLI->hasFloatingPointExceptions(); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); - ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); if (N1CFP) { if (!N2CFP && isCommutativeBinOp(Opcode)) { // Canonicalize constant to RHS if commutative. @@ -3787,7 +3795,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); switch (Opcode) { case ISD::FMA: { ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); @@ -3845,9 +3853,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, "Dest and insert subvector source types must match!"); assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && "Insert subvector must be from smaller vector to larger vector!"); - if (isa<ConstantSDNode>(Index.getNode())) { + if (isa<ConstantSDNode>(Index)) { assert((N2.getValueType().getVectorNumElements() + - cast<ConstantSDNode>(Index.getNode())->getZExtValue() + cast<ConstantSDNode>(Index)->getZExtValue() <= VT.getVectorNumElements()) && "Insert subvector overflow!"); } @@ -3994,7 +4002,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); APInt Val(NumVTBits, 0); - if (TLI.isLittleEndian()) { + if (DAG.getDataLayout().isLittleEndian()) { for (unsigned i = 0; i != NumBytes; ++i) Val |= (uint64_t)(unsigned char)Str[i] << i*8; } else { @@ -4066,9 +4074,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, if (VT == MVT::Other) { unsigned AS = 0; - if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) || + if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(AS) || TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) { - VT = TLI.getPointerTy(); + VT = TLI.getPointerTy(DAG.getDataLayout()); } else { switch (DstAlign & 7) { case 0: VT = MVT::i64; break; @@ -4185,14 +4193,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Align && - TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign)) + while (NewAlign > Align && + DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign)) NewAlign /= 2; if (NewAlign > Align) { @@ -4294,7 +4302,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -4385,7 +4393,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); + unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -4488,19 +4496,21 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext()); + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), - Type::getVoidTy(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), - TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult() - .setTailCall(isTailCall); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), + TLI->getPointerTy(getDataLayout())), + std::move(Args), 0) + .setDiscardResult() + .setTailCall(isTailCall); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -4544,19 +4554,21 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext()); + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), - Type::getVoidTy(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), - TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult() - .setTailCall(isTailCall); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), + TLI->getPointerTy(getDataLayout())), + std::move(Args), 0) + .setDiscardResult() + .setTailCall(isTailCall); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -4594,7 +4606,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, } // Emit a library call. - Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); + Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; @@ -4608,13 +4620,15 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), - Type::getVoidTy(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult() - .setTailCall(isTailCall); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy(getDataLayout())), + std::move(Args), 0) + .setDiscardResult() + .setTailCall(isTailCall); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -6656,7 +6670,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const { /// isOnlyUserOf - Return true if this node is the only use of N. /// -bool SDNode::isOnlyUserOf(SDNode *N) const { +bool SDNode::isOnlyUserOf(const SDNode *N) const { bool Seen = false; for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { SDNode *User = *I; @@ -6671,16 +6685,16 @@ bool SDNode::isOnlyUserOf(SDNode *N) const { /// isOperand - Return true if this node is an operand of N. /// -bool SDValue::isOperandOf(SDNode *N) const { +bool SDValue::isOperandOf(const SDNode *N) const { for (const SDValue &Op : N->op_values()) if (*this == Op) return true; return false; } -bool SDNode::isOperandOf(SDNode *N) const { - for (unsigned i = 0, e = N->NumOperands; i != e; ++i) - if (this == N->OperandList[i].getNode()) +bool SDNode::isOperandOf(const SDNode *N) const { + for (const SDValue &Op : N->op_values()) + if (this == Op.getNode()) return true; return false; } @@ -6784,10 +6798,9 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { if (OperandVT.isVector()) { // A vector operand; extract a single element. EVT OperandEltVT = OperandVT.getVectorElementType(); - Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, - OperandEltVT, - Operand, - getConstant(i, dl, TLI->getVectorIdxTy())); + Operands[j] = + getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, + getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout()))); } else { // A scalar operand; just use it as is. Operands[j] = Operand; @@ -6891,10 +6904,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const GlobalValue *GV; int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); + unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne, - *TLI->getDataLayout()); + getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) @@ -6950,10 +6963,10 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, "More vector elements requested than available!"); SDValue Lo, Hi; Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, - getConstant(0, DL, TLI->getVectorIdxTy())); + getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, getConstant(LoVT.getVectorNumElements(), DL, - TLI->getVectorIdxTy())); + TLI->getVectorIdxTy(getDataLayout()))); return std::make_pair(Lo, Hi); } @@ -6965,7 +6978,7 @@ void SelectionDAG::ExtractVectorElements(SDValue Op, Count = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); - EVT IdxTy = TLI->getVectorIdxTy(); + EVT IdxTy = TLI->getVectorIdxTy(getDataLayout()); SDLoc SL(Op); for (unsigned i = Start, e = Start + Count; i != e; ++i) { Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, @@ -7080,14 +7093,12 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { ConstantSDNode * BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { - return dyn_cast_or_null<ConstantSDNode>( - getSplatValue(UndefElements).getNode()); + return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements)); } ConstantFPSDNode * BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { - return dyn_cast_or_null<ConstantFPSDNode>( - getSplatValue(UndefElements).getNode()); + return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements)); } bool BuildVectorSDNode::isConstant() const { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4897082f89aa..2c3c0eb101a0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -146,7 +146,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); } - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); @@ -160,13 +160,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, // Combine the round and odd parts. Lo = Val; - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); - Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, - DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, - TLI.getPointerTy())); + Hi = + DAG.getNode(ISD::SHL, DL, TotalVT, Hi, + DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, + TLI.getPointerTy(DAG.getDataLayout()))); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); } @@ -177,7 +178,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); - if (TLI.hasBigEndianPartOrdering(ValueVT)) + if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { @@ -211,8 +212,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) - return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, - DAG.getTargetConstant(1, DL, TLI.getPointerTy())); + return DAG.getNode( + ISD::FP_ROUND, DL, ValueVT, Val, + DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()))); return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } @@ -305,8 +307,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy())); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } // Vector/Vector bitcast. @@ -362,10 +365,10 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, if (ValueVT.isVector()) return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; - assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); + assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && + "Copying to an illegal type!"); if (NumParts == 0) return; @@ -433,7 +436,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, DAG.getIntPtrConstant(RoundBits, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) // The odd parts were reversed by getCopyToParts - unreverse them. std::reverse(Parts + RoundParts, Parts + NumParts); @@ -468,7 +471,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, } } - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::reverse(Parts, Parts + OrigNumParts); } @@ -497,9 +500,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, // undef elements. SmallVector<SDValue, 16> Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ElementVT, Val, DAG.getConstant(i, DL, - TLI.getVectorIdxTy()))); + Ops.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); for (unsigned i = ValueVT.getVectorNumElements(), e = PartVT.getVectorNumElements(); i != e; ++i) @@ -524,9 +527,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, // Vector -> scalar conversion. assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy())); + Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); bool Smaller = ValueVT.bitsLE(PartVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), @@ -554,14 +557,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, SmallVector<SDValue, 8> Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { if (IntermediateVT.isVector()) - Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, - IntermediateVT, Val, - DAG.getConstant(i * (NumElements / NumIntermediates), DL, - TLI.getVectorIdxTy())); + Ops[i] = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, + DAG.getConstant(i * (NumElements / NumIntermediates), DL, + TLI.getVectorIdxTy(DAG.getDataLayout()))); else - Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - IntermediateVT, Val, - DAG.getConstant(i, DL, TLI.getVectorIdxTy())); + Ops[i] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); } // Split the intermediate operands into legal parts. @@ -588,14 +591,14 @@ RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} -RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, Type *Ty) { - ComputeValueVTs(tli, Ty, ValueVTs); +RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL, unsigned Reg, Type *Ty) { + ComputeValueVTs(TLI, DL, Ty, ValueVTs); for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); - MVT RegisterVT = tli.getRegisterType(Context, ValueVT); + unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); + MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); @@ -796,7 +799,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { // If we clobbered the stack pointer, MFI should know about it. assert(DAG.getMachineFunction().getFrameInfo()-> - hasInlineAsmWithSPAdjust()); + hasOpaqueSPAdjustment()); } } } @@ -807,7 +810,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - DL = DAG.getTarget().getDataLayout(); + DL = &DAG.getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -964,8 +967,8 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, - Ty); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), InReg, Ty); SDValue Chain = DAG.getEntryNode(); Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); resolveDanglingDebugInfo(V, Result); @@ -1031,7 +1034,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (const Constant *C = dyn_cast<Constant>(V)) { - EVT VT = TLI.getValueType(V->getType(), true); + EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true); if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) return DAG.getConstant(*CI, getCurSDLoc(), VT); @@ -1041,7 +1044,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa<ConstantPointerNull>(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); - return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(AS)); + return DAG.getConstant(0, getCurSDLoc(), + TLI.getPointerTy(DAG.getDataLayout(), AS)); } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) @@ -1095,7 +1099,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { "Unknown struct or array constant!"); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, C->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct @@ -1127,7 +1131,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); - EVT EltVT = TLI.getValueType(VecTy->getElementType()); + EVT EltVT = + TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) @@ -1147,13 +1152,15 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + return DAG.getFrameIndex(SI->second, + TLI.getPointerTy(DAG.getDataLayout())); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast<Instruction>(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); - RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, + Inst->getType()); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1163,6 +1170,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto &DL = DAG.getDataLayout(); SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; SmallVector<SDValue, 8> OutVals; @@ -1175,7 +1183,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector<EVT, 1> PtrValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); @@ -1183,7 +1191,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); @@ -1203,7 +1211,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { MVT::Other, Chains); } else if (I.getNumOperands() != 0) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); + ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); @@ -1692,7 +1700,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); - EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); @@ -1723,9 +1731,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy()); + SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout())); - unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); + unsigned JumpTableReg = + FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout())); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1733,11 +1742,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the largest // case in the switch. - SDValue CMP = - DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), - ISD::SETUGT); + SDValue CMP = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, CopyTo, CMP, @@ -1762,7 +1770,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // First create the loads to the guard/stack slot for the comparison. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI->getStackProtectorIndex(); @@ -1771,8 +1779,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDValue GuardPtr = getValue(IRGuard); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); - unsigned Align = - TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); + unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType()); SDValue Guard; SDLoc dl = getCurSDLoc(); @@ -1799,10 +1806,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, EVT VT = Guard.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); - SDValue Cmp = - DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); + SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. @@ -1848,10 +1855,10 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, // Check range const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue RangeCmp = - DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); + SDValue RangeCmp = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; @@ -1867,7 +1874,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } } if (UsePtrType) { - VT = TLI.getPointerTy(); + VT = TLI.getPointerTy(DAG.getDataLayout()); Sub = DAG.getZExtOrTrunc(Sub, dl, VT); } @@ -1909,13 +1916,15 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC( - dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ); + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), + ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. Cmp = DAG.getSetCC( - dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE); + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), + ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, @@ -1924,8 +1933,9 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // Emit bit tests and jumps SDValue AndOp = DAG.getNode(ISD::AND, dl, VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); - Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, - DAG.getConstant(0, dl, VT), ISD::SETNE); + Cmp = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); } // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. @@ -2013,7 +2023,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { SmallVector<EVT, 2> ValueVTs; SDLoc dl = getCurSDLoc(); - ComputeValueVTs(TLI, LP.getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been @@ -2022,14 +2032,16 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { if (FuncInfo.ExceptionPointerVirtReg) { Ops[0] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), dl, - FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), + FuncInfo.ExceptionPointerVirtReg, + TLI.getPointerTy(DAG.getDataLayout())), dl, ValueVTs[0]); } else { - Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy()); + Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())); } Ops[1] = DAG.getZExtOrTrunc( DAG.getCopyFromReg(DAG.getEntryNode(), dl, - FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), + FuncInfo.ExceptionSelectorVirtReg, + TLI.getPointerTy(DAG.getDataLayout())), dl, ValueVTs[1]); // Merge into one. @@ -2038,28 +2050,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { setValue(&LP, Res); } -unsigned -SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, - MachineBasicBlock *LPadBB) { - SDValue Chain = getControlRoot(); - SDLoc dl = getCurSDLoc(); - - // Get the typeid that we will dispatch on later. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); - unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); - SDValue Sel = DAG.getConstant(TypeID, dl, TLI.getPointerTy()); - Chain = DAG.getCopyToReg(Chain, dl, VReg, Sel); - - // Branch to the main landing pad block. - MachineBasicBlock *ClauseMBB = FuncInfo.MBB; - ClauseMBB->addSuccessor(LPadBB); - DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, Chain, - DAG.getBasicBlock(LPadBB))); - return VReg; -} - void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { #ifndef NDEBUG for (const CaseCluster &CC : Clusters) @@ -2186,8 +2176,8 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - EVT ShiftTy = - DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( + Op2.getValueType(), DAG.getDataLayout()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { @@ -2256,7 +2246,8 @@ void SelectionDAGBuilder::visitICmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } @@ -2271,13 +2262,15 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { ISD::CondCode Condition = getFCmpCondCode(predicate); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs); + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), + ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -2336,7 +2329,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) { void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } @@ -2344,7 +2338,8 @@ void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } @@ -2352,7 +2347,8 @@ void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } @@ -2361,43 +2357,49 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) { SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, - DAG.getTargetConstant(0, dl, TLI.getPointerTy()))); + DAG.getTargetConstant( + 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); } void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } @@ -2405,7 +2407,8 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } @@ -2413,14 +2416,16 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); SDLoc dl = getCurSDLoc(); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. @@ -2442,7 +2447,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); SDValue N = getValue(SV); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); @@ -2457,19 +2462,21 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), - getCurSDLoc(), TLI.getVectorIdxTy()); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), + TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), - TLI.getValueType(I.getType()), InVec, InVal, InIdx)); + TLI.getValueType(DAG.getDataLayout(), I.getType()), + InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), - getCurSDLoc(), TLI.getVectorIdxTy()); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), + TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), - TLI.getValueType(I.getType()), InVec, InIdx)); + TLI.getValueType(DAG.getDataLayout(), I.getType()), + InVec, InIdx)); } // Utility for visitShuffleVector - Return true if every element in Mask, @@ -2492,7 +2499,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { unsigned MaskNumElts = Mask.size(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); @@ -2614,7 +2621,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDLoc dl = getCurSDLoc(); Src = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, VT, Src, - DAG.getConstant(StartIdx[Input], dl, TLI.getVectorIdxTy())); + DAG.getConstant(StartIdx[Input], dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); } } @@ -2641,7 +2649,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); - EVT IdxVT = TLI.getVectorIdxTy(); + EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); SDLoc dl = getCurSDLoc(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { @@ -2676,9 +2684,9 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> AggValueVTs; - ComputeValueVTs(TLI, AggTy, AggValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); @@ -2722,7 +2730,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); @@ -2755,6 +2763,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); + // Normalize Vector GEP - all scalar operands should be converted to the + // splat vector. + unsigned VectorWidth = I.getType()->isVectorTy() ? + cast<VectorType>(I.getType())->getVectorNumElements() : 0; + + if (VectorWidth && !N.getValueType().isVector()) { + MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth); + SmallVector<SDValue, 16> Ops(VectorWidth, N); + N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { const Value *Idx = *OI; @@ -2770,16 +2788,25 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = StTy->getElementType(Field); } else { Ty = cast<SequentialType>(Ty)->getElementType(); - MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS); + MVT PtrTy = + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); unsigned PtrSize = PtrTy.getSizeInBits(); APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); - // If this is a constant subscript, handle it quickly. - if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { + // If this is a scalar constant or a splat vector of constants, + // handle it quickly. + const auto *CI = dyn_cast<ConstantInt>(Idx); + if (!CI && isa<ConstantDataVector>(Idx) && + cast<ConstantDataVector>(Idx)->getSplatValue()) + CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue()); + + if (CI) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); - SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy); + SDValue OffsVal = VectorWidth ? + DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : + DAG.getConstant(Offs, dl, PtrTy); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); continue; } @@ -2787,6 +2814,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Idx * ElementSize; SDValue IdxN = getValue(Idx); + if (!IdxN.getValueType().isVector() && VectorWidth) { + MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); + SmallVector<SDValue, 16> Ops(VectorWidth, IdxN); + IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); @@ -2823,14 +2855,14 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDLoc dl = getCurSDLoc(); Type *Ty = I.getAllocatedType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), - I.getAlignment()); + std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(); + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); @@ -2898,7 +2930,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -2975,8 +3007,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(), - ValueVTs, &Offsets); + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), + SrcV->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3077,9 +3109,10 @@ static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index, else if (SDB->findValue(ShuffleInst)) { SDValue ShuffleNode = SDB->getValue(ShuffleInst); SDLoc sdl = ShuffleNode; - Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, - ShuffleNode.getValueType().getScalarType(), ShuffleNode, - DAG.getConstant(0, sdl, TLI.getVectorIdxTy())); + Base = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, sdl, + ShuffleNode.getValueType().getScalarType(), ShuffleNode, + DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout()))); SDB->setValue(Ptr, Base); } else @@ -3126,7 +3159,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); if (!UniformBase) { - Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); } SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; @@ -3146,7 +3179,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDValue Mask = getValue(I.getArgOperand(2)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); @@ -3184,7 +3217,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Mask = getValue(I.getArgOperand(2)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); if (!Alignment) Alignment = DAG.getEVTAlignment(VT); @@ -3214,7 +3247,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { Alignment, AAInfo, Ranges); if (!UniformBase) { - Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy()); + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); } SDValue Ops[] = { Root, Src0, Mask, Base, Index }; @@ -3291,8 +3324,10 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), dl, TLI.getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), dl, + TLI.getPointerTy(DAG.getDataLayout())); + Ops[2] = DAG.getConstant(I.getSynchScope(), dl, + TLI.getPointerTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -3304,7 +3339,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3339,7 +3374,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = TLI.getValueType(I.getValueOperand()->getType()); + EVT VT = + TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); @@ -3382,7 +3418,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), - TLI.getPointerTy())); + TLI.getPointerTy(DAG.getDataLayout()))); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3391,7 +3427,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3425,7 +3461,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!I.getType()->isVoidTy()) { if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { - EVT VT = TLI.getValueType(PTy); + EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); } @@ -3458,8 +3494,9 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, SDLoc dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, dl, MVT::i32)); - SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, - DAG.getConstant(23, dl, TLI.getPointerTy())); + SDValue t1 = DAG.getNode( + ISD::SRL, dl, MVT::i32, t0, + DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout()))); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, DAG.getConstant(127, dl, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); @@ -3484,7 +3521,8 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, // IntegerPartOfX <<= 23; IntegerPartOfX = DAG.getNode( ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy())); + DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy( + DAG.getDataLayout()))); SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { @@ -4071,11 +4109,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vaend: visitVAEnd(I); return nullptr; case Intrinsic::vacopy: visitVACopy(I); return nullptr; case Intrinsic::returnaddress: - setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, + TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::frameaddress: - setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, + TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::read_register: { @@ -4083,7 +4123,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Chain = getRoot(); SDValue RegName = DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); Res = DAG.getNode(ISD::READ_REGISTER, sdl, DAG.getVTList(VT, MVT::Other), Chain, RegName); setValue(&I, Res); @@ -4335,14 +4375,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::eh_dwarf_cfa: { SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, - TLI.getPointerTy()); + TLI.getPointerTy(DAG.getDataLayout())); SDValue Offset = DAG.getNode(ISD::ADD, sdl, CfaArg.getValueType(), DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, CfaArg.getValueType()), CfaArg); - SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), - DAG.getConstant(0, sdl, TLI.getPointerTy())); + SDValue FA = DAG.getNode( + ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), + DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()))); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return nullptr; @@ -4444,7 +4485,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, sdl, MVT::i32), @@ -4474,7 +4515,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); const Value *Op1 = I.getArgOperand(0); Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), DAG.getValueType(DestVT), @@ -4564,7 +4605,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2)))); return nullptr; case Intrinsic::fmuladd: { - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && TLI.isFMAFasterThanFMulAndFAdd(VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, @@ -4593,10 +4634,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MVT::i32)))); return nullptr; case Intrinsic::convert_from_fp16: - setValue(&I, - DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()), - DAG.getNode(ISD::BITCAST, sdl, MVT::f16, - getValue(I.getArgOperand(0))))); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, + TLI.getValueType(DAG.getDataLayout(), I.getType()), + DAG.getNode(ISD::BITCAST, sdl, MVT::f16, + getValue(I.getArgOperand(0))))); return nullptr; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); @@ -4640,8 +4681,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::stacksave: { SDValue Op = getRoot(); - Res = DAG.getNode(ISD::STACKSAVE, sdl, - DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op); + Res = DAG.getNode( + ISD::STACKSAVE, sdl, + DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return nullptr; @@ -4655,7 +4697,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - EVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); SDValue Src, Chain = getRoot(); const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); @@ -4753,7 +4795,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::adjust_trampoline: { setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, - TLI.getPointerTy(), + TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; } @@ -4794,10 +4836,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl).setChain(getRoot()) - .setCallee(CallingConv::C, I.getType(), - DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), - std::move(Args), 0); + CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee( + CallingConv::C, I.getType(), + DAG.getExternalSymbol(TrapFuncName.data(), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); @@ -4873,7 +4916,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); - Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + Ops[1] = + DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); @@ -4883,7 +4927,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::invariant_start: // Discard region information. - setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); return nullptr; case Intrinsic::invariant_end: // Discard region information. @@ -4903,7 +4947,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); case Intrinsic::eh_actions: - setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); return nullptr; case Intrinsic::donothing: // ignore @@ -4935,11 +4979,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); - case Intrinsic::frameescape: { + case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission + // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission // is the same on all targets. for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); @@ -4953,7 +4997,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::getRealLinkageName(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, - TII->get(TargetOpcode::FRAME_ALLOC)) + TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) .addFrameIndex(FI); } @@ -4961,10 +5005,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } - case Intrinsic::framerecover: { - // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx) + case Intrinsic::localrecover: { + // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); - MVT PtrVT = TLI.getPointerTy(0); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0); // Get the symbol that defines the frame offset. auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); @@ -4978,7 +5022,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // that would make this PC relative. SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); SDValue OffsetVal = - DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); + DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); // Add the offset to the FP. Value *FP = I.getArgOperand(1); @@ -4994,7 +5038,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_exceptioncode: { unsigned Reg = TLI.getExceptionPointerRegister(); assert(Reg && "cannot get exception code on this platform"); - MVT PtrVT = TLI.getPointerTy(); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad"); unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); @@ -5178,7 +5222,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { - EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); + EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType(), true); if (IsSigned) Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); else @@ -5203,7 +5248,8 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); if (CSize && CSize->getZExtValue() == 0) { - EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); + EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType(), true); setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); return true; } @@ -5640,8 +5686,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (!RenameFn) Callee = getValue(I.getCalledValue()); else - Callee = DAG.getExternalSymbol(RenameFn, - DAG.getTargetLoweringInfo().getPointerTy()); + Callee = DAG.getExternalSymbol( + RenameFn, + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); // Check if we can potentially perform a tail call. More detailed checking is // be done within LowerCallTo, after more information about the call is known. @@ -5670,13 +5717,12 @@ public: /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. - EVT getCallOperandValEVT(LLVMContext &Context, - const TargetLowering &TLI, - const DataLayout *DL) const { + EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL) const { if (!CallOperandVal) return MVT::Other; if (isa<BasicBlock>(CallOperandVal)) - return TLI.getPointerTy(); + return TLI.getPointerTy(DL); llvm::Type *OpTy = CallOperandVal->getType(); @@ -5698,7 +5744,7 @@ public: // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = DL->getTypeSizeInBits(OpTy); + unsigned BitSize = DL.getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -5712,7 +5758,7 @@ public: } } - return TLI.getValueType(OpTy, true); + return TLI.getValueType(DL, OpTy, true); } }; @@ -5838,8 +5884,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfoVector ConstraintOperands; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - TargetLowering::AsmOperandInfoVector TargetConstraints = - TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS); + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( + DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); bool hasMemory = false; @@ -5864,10 +5910,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast<StructType>(CS.getType())) { - OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); + OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), + STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI.getSimpleValueType(CS.getType()); + OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); } ++ResNo; break; @@ -5888,8 +5935,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = - OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT(); + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, + DAG.getDataLayout()).getSimpleVT(); } OpInfo.ConstraintVT = OpVT; @@ -5977,17 +6024,19 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const Value *OpVal = OpInfo.CallOperandVal; if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { - OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), - TLI.getPointerTy()); + OpInfo.CallOperand = DAG.getConstantPool( + cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout())); } else { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); + unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + SDValue StackSlot = + DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); Chain = DAG.getStore(Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(SSFI), @@ -6022,9 +6071,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // AsmNodeOperands - The operands for the ISD::INLINEASM node. std::vector<SDValue> AsmNodeOperands; AsmNodeOperands.push_back(SDValue()); // reserve space for input chain - AsmNodeOperands.push_back( - DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), - TLI.getPointerTy())); + AsmNodeOperands.push_back(DAG.getTargetExternalSymbol( + IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout()))); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we @@ -6064,8 +6112,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } } - AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, getCurSDLoc(), - TLI.getPointerTy())); + AsmNodeOperands.push_back(DAG.getTargetConstant( + ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -6201,8 +6249,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); - AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, getCurSDLoc(), - TLI.getPointerTy())); + AsmNodeOperands.push_back(DAG.getTargetConstant( + OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } @@ -6227,16 +6275,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); - AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - getCurSDLoc(), - TLI.getPointerTy())); + AsmNodeOperands.push_back(DAG.getTargetConstant( + ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); - assert(InOperandVal.getValueType() == TLI.getPointerTy() && + assert(InOperandVal.getValueType() == + TLI.getPointerTy(DAG.getDataLayout()) && "Memory operands expect pointer values"); unsigned ConstraintID = @@ -6314,7 +6362,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - EVT ResultType = TLI.getValueType(CS.getType()); + EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can @@ -6380,9 +6428,9 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const DataLayout &DL = *TLI.getDataLayout(); - SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(), - getRoot(), getValue(I.getOperand(0)), + const DataLayout &DL = DAG.getDataLayout(); + SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()), + getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), DL.getABITypeAlignment(I.getType())); setValue(&I, V); @@ -6473,8 +6521,8 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); - Ops.push_back( - Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + Ops.push_back(Builder.DAG.getTargetFrameIndex( + FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); } else Ops.push_back(OpVal); } @@ -6654,7 +6702,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Create the return types based on the intrinsic definition const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 3> ValueVTs; - ComputeValueVTs(TLI, CS->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); assert(ValueVTs.size() == 1 && "Expected only one return value type."); // There is always a chain and a glue type at the end @@ -6718,10 +6766,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Type *OrigRetTy = CLI.RetTy; SmallVector<EVT, 4> RetTys; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets); + auto &DL = CLI.DAG.getDataLayout(); + ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this); + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); bool CanLowerReturn = this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), @@ -6733,13 +6782,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // FIXME: equivalent assert? // assert(!CS.hasInAllocaArgument() && // "sret demotion is incompatible with inalloca"); - uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy); - unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy); + uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); + unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); MachineFunction &MF = CLI.DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); - DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy()); + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; @@ -6784,7 +6833,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ArgListTy &Args = CLI.getArgs(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*this, Args[i].Ty, ValueVTs); + ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); Type *FinalType = Args[i].Ty; if (Args[i].isByVal) FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); @@ -6797,7 +6846,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); @@ -6821,14 +6870,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (Args[i].isByVal || Args[i].isInAlloca) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); + Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. unsigned FrameAlign; if (Args[i].Alignment) FrameAlign = Args[i].Alignment; else - FrameAlign = getByValTypeAlignment(ElementTy); + FrameAlign = getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } if (Args[i].isNest) @@ -6923,7 +6972,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SmallVector<EVT, 1> PVTs; Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); - ComputeValueVTs(*this, PtrRetTy, PVTs); + ComputeValueVTs(*this, DL, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; @@ -6997,7 +7046,8 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, + V->getType()); SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == @@ -7030,13 +7080,14 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); - const DataLayout *DL = TLI->getDataLayout(); + const DataLayout &DL = DAG.getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), + PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. @@ -7053,7 +7104,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, I->getType(), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); unsigned PartBase = 0; Type *FinalType = I->getType(); @@ -7066,7 +7117,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); @@ -7090,14 +7141,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (Flags.isByVal() || Flags.isInAlloca()) { PointerType *Ty = cast<PointerType>(I->getType()); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); + Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); else - FrameAlign = TLI->getByValTypeAlignment(ElementTy); + FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) @@ -7153,7 +7204,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), + PointerType::getUnqual(F.getReturnType()), ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; @@ -7177,7 +7229,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ++I, ++Idx) { SmallVector<SDValue, 4> ArgValues; SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, I->getType(), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); // If this argument is unused then remember its value. It is used to generate @@ -7324,7 +7376,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // the input for this MBB. SmallVector<EVT, 4> ValueVTs; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - ComputeValueVTs(TLI, PN->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); @@ -7595,7 +7647,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { // FIXME: Using the pointer type doesn't seem ideal. - uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); + uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; return Range <= BW; } @@ -7650,8 +7702,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, APInt LowBound; APInt CmpRange; - const int BitWidth = - DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits(); + const int BitWidth = DAG.getTargetLoweringInfo() + .getPointerTy(DAG.getDataLayout()) + .getSizeInBits(); assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); if (Low.isNonNegative() && High.slt(BitWidth)) { @@ -7731,7 +7784,7 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, // If target does not have legal shift left, do not emit bit tests at all. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PTy = TLI.getPointerTy(); + EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); if (!TLI.isOperationLegal(ISD::SHL, PTy)) return; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index f225d54d189d..700675453fe7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -755,8 +755,6 @@ public: void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); - unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV, - MachineBasicBlock *LPadMBB); private: // These all get lowered before this pass. @@ -915,8 +913,8 @@ struct RegsForValue { RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt); - RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg, - Type *Ty); + RegsForValue(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL, unsigned Reg, Type *Ty); /// append - Add the specified values to this one. void append(const RegsForValue &RHS) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index ef468a2b1c54..5b9b18286fae 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -95,7 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; - case ISD::FRAME_ALLOC_RECOVER: return "FRAME_ALLOC_RECOVER"; + case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER"; case ISD::READ_REGISTER: return "READ_REGISTER"; case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 31f8210f40f0..97ece8b9248a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -921,7 +921,8 @@ void SelectionDAGISel::DoInstructionSelection() { bool SelectionDAGISel::PrepareEHLandingPad() { MachineBasicBlock *MBB = FuncInfo->MBB; - const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); + const TargetRegisterClass *PtrRC = + TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout())); // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. @@ -1931,7 +1932,8 @@ SDNode MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); unsigned Reg = - TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0)); + TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0), + *CurDAG); SDValue New = CurDAG->getCopyFromReg( Op->getOperand(0), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); @@ -1944,7 +1946,8 @@ SDNode MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), - Op->getOperand(2).getValueType()); + Op->getOperand(2).getValueType(), + *CurDAG); SDValue New = CurDAG->getCopyToReg( Op->getOperand(0), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); @@ -2329,21 +2332,23 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool -CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering *TLI) { +CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, + const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (N.getValueType() == VT) return true; // Handle the case when VT is iPTR. - return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(); + return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering *TLI, unsigned ChildNo) { + SDValue N, const TargetLowering *TLI, const DataLayout &DL, + unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. - return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI); + return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI, + DL); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2355,13 +2360,13 @@ CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering *TLI) { + SDValue N, const TargetLowering *TLI, const DataLayout &DL) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (cast<VTSDNode>(N)->getVT() == VT) return true; // Handle the case when VT is iPTR. - return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(); + return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2444,7 +2449,8 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Result = !::CheckOpcode(Table, Index, N.getNode()); return Index; case SelectionDAGISel::OPC_CheckType: - Result = !::CheckType(Table, Index, N, SDISel.TLI); + Result = !::CheckType(Table, Index, N, SDISel.TLI, + SDISel.CurDAG->getDataLayout()); return Index; case SelectionDAGISel::OPC_CheckChild0Type: case SelectionDAGISel::OPC_CheckChild1Type: @@ -2454,15 +2460,16 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckChild5Type: case SelectionDAGISel::OPC_CheckChild6Type: case SelectionDAGISel::OPC_CheckChild7Type: - Result = !::CheckChildType(Table, Index, N, SDISel.TLI, - Table[Index - 1] - - SelectionDAGISel::OPC_CheckChild0Type); + Result = !::CheckChildType( + Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout(), + Table[Index - 1] - SelectionDAGISel::OPC_CheckChild0Type); return Index; case SelectionDAGISel::OPC_CheckCondCode: Result = !::CheckCondCode(Table, Index, N); return Index; case SelectionDAGISel::OPC_CheckValueType: - Result = !::CheckValueType(Table, Index, N, SDISel.TLI); + Result = !::CheckValueType(Table, Index, N, SDISel.TLI, + SDISel.CurDAG->getDataLayout()); return Index; case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); @@ -2816,7 +2823,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; case OPC_CheckType: - if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) + if (!::CheckType(MatcherTable, MatcherIndex, N, TLI, + CurDAG->getDataLayout())) break; continue; @@ -2864,7 +2872,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (CaseVT == MVT::iPTR) - CaseVT = TLI->getPointerTy(); + CaseVT = TLI->getPointerTy(CurDAG->getDataLayout()); // If the VT matches, then we will execute this case. if (CurNodeVT == CaseVT) @@ -2887,14 +2895,16 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckChild4Type: case OPC_CheckChild5Type: case OPC_CheckChild6Type: case OPC_CheckChild7Type: if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, - Opcode-OPC_CheckChild0Type)) + CurDAG->getDataLayout(), + Opcode - OPC_CheckChild0Type)) break; continue; case OPC_CheckCondCode: if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; continue; case OPC_CheckValueType: - if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) + if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI, + CurDAG->getDataLayout())) break; continue; case OPC_CheckInteger: @@ -3097,7 +3107,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (VT == MVT::iPTR) - VT = TLI->getPointerTy().SimpleTy; + VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy; VTs.push_back(VT); } diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index bd40cac95543..34688df4765b 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -337,9 +337,9 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, // TODO: To eliminate this problem we can remove gc.result intrinsics // completelly and make statepoint call to return a tuple. unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); - RegsForValue RFV(*Builder.DAG.getContext(), - Builder.DAG.getTargetLoweringInfo(), Reg, - ISP.getActualReturnType()); + RegsForValue RFV( + *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(), + Builder.DAG.getDataLayout(), Reg, ISP.getActualReturnType()); SDValue Chain = Builder.DAG.getEntryNode(); RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain, diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e7722b392a81..fbf651277c7f 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -102,7 +102,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, } if (LC == RTLIB::UNKNOWN_LIBCALL) report_fatal_error("Unsupported library call operation!"); - SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy(DAG.getDataLayout())); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); @@ -206,14 +207,16 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, NewRHS = DAG.getConstant(0, dl, RetVT); CCCode = getCmpLibcallCC(LC1); if (LC2 != RTLIB::UNKNOWN_LIBCALL) { - SDValue Tmp = DAG.getNode(ISD::SETCC, dl, - getSetCCResultType(*DAG.getContext(), RetVT), - NewLHS, NewRHS, DAG.getCondCode(CCCode)); + SDValue Tmp = DAG.getNode( + ISD::SETCC, dl, + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), + NewLHS, NewRHS, DAG.getCondCode(CCCode)); NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl).first; - NewLHS = DAG.getNode(ISD::SETCC, dl, - getSetCCResultType(*DAG.getContext(), RetVT), NewLHS, - NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); + NewLHS = DAG.getNode( + ISD::SETCC, dl, + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), + NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); NewRHS = SDValue(); } @@ -242,7 +245,7 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) - return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0)); + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout())); return Table; } @@ -265,9 +268,7 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // In dynamic-no-pic mode, assume that known defined values are safe. if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC && - GA && - !GA->getGlobal()->isDeclaration() && - !GA->getGlobal()->isWeakForLinker()) + GA && GA->getGlobal()->isStrongDefinitionForLinker()) return true; // Otherwise assume nothing is safe. @@ -383,6 +384,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, "Mask size mismatches value type size!"); APInt NewMask = DemandedMask; SDLoc dl(Op); + auto &DL = TLO.DAG.getDataLayout(); // Don't know anything. KnownZero = KnownOne = APInt(BitWidth, 0); @@ -645,7 +647,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, unsigned InnerBits = InnerVT.getSizeInBits(); if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 && isTypeDesirableForOp(ISD::SHL, InnerVT)) { - EVT ShTy = getShiftAmountTy(InnerVT); + EVT ShTy = getShiftAmountTy(InnerVT, DL); if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) ShTy = InnerVT; SDValue NarrowShl = @@ -824,7 +826,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // for scalar types after legalization. EVT ShiftAmtTy = Op.getValueType(); if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) - ShiftAmtTy = getShiftAmountTy(ShiftAmtTy); + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl, ShiftAmtTy); @@ -1009,8 +1011,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue Shift = In.getOperand(1); if (TLO.LegalTypes()) { uint64_t ShVal = ShAmt->getZExtValue(); - Shift = - TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(Op.getValueType())); + Shift = TLO.DAG.getConstant(ShVal, dl, + getShiftAmountTy(Op.getValueType(), DL)); } APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, @@ -1400,7 +1402,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offset<origWidth/width; offset++) { if ((newMask & Mask) == Mask) { - if (!getDataLayout()->isLittleEndian()) + if (!DAG.getDataLayout().isLittleEndian()) bestOffset = (origWidth/width - offset - 1) * (width/8); else bestOffset = (uint64_t)offset * (width/8); @@ -1473,7 +1475,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { - EVT NewSetCCVT = getSetCCResultType(*DAG.getContext(), newVT); + EVT NewSetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT); SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), @@ -1692,11 +1695,13 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && (VT == N0.getValueType() || (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && - N0.getOpcode() == ISD::AND) + N0.getOpcode() == ISD::AND) { + auto &DL = DAG.getDataLayout(); if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = DCI.isBeforeLegalize() ? - getPointerTy() : getShiftAmountTy(N0.getValueType()); + EVT ShiftTy = DCI.isBeforeLegalize() + ? getPointerTy(DL) + : getShiftAmountTy(N0.getValueType(), DL); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. if (AndRHS->getAPIntValue().isPowerOf2()) { @@ -1716,6 +1721,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } } + } if (C1.getMinSignedBits() <= 64 && !isLegalICmpImmediate(C1.getSExtValue())) { @@ -1727,8 +1733,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); - EVT ShiftTy = DCI.isBeforeLegalize() ? - getPointerTy() : getShiftAmountTy(N0.getValueType()); + auto &DL = DAG.getDataLayout(); + EVT ShiftTy = DCI.isBeforeLegalize() + ? getPointerTy(DL) + : getShiftAmountTy(N0.getValueType(), DL); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), DAG.getConstant(ShiftBits, dl, @@ -1757,8 +1765,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, NewC = NewC.lshr(ShiftBits); if (ShiftBits && NewC.getMinSignedBits() <= 64 && isLegalICmpImmediate(NewC.getSExtValue())) { - EVT ShiftTy = DCI.isBeforeLegalize() ? - getPointerTy() : getShiftAmountTy(N0.getValueType()); + auto &DL = DAG.getDataLayout(); + EVT ShiftTy = DCI.isBeforeLegalize() + ? getPointerTy(DL) + : getShiftAmountTy(N0.getValueType(), DL); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, DAG.getConstant(ShiftBits, dl, ShiftTy)); @@ -1945,10 +1955,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond); if (N0.getNode()->hasOneUse()) { assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + auto &DL = DAG.getDataLayout(); // (Z-X) == X --> Z == X<<1 - SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, - DAG.getConstant(1, dl, - getShiftAmountTy(N1.getValueType()))); + SDValue SH = DAG.getNode( + ISD::SHL, dl, N1.getValueType(), N1, + DAG.getConstant(1, dl, + getShiftAmountTy(N1.getValueType(), DL))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); @@ -1969,10 +1981,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getConstant(0, dl, N1.getValueType()), Cond); if (N1.getNode()->hasOneUse()) { assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); + auto &DL = DAG.getDataLayout(); // X == (Z-X) --> X<<1 == Z - SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, - DAG.getConstant(1, dl, - getShiftAmountTy(N0.getValueType()))); + SDValue SH = DAG.getNode( + ISD::SHL, dl, N1.getValueType(), N0, + DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); @@ -2105,9 +2118,8 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { // Inline Assembler Implementation Methods //===----------------------------------------------------------------------===// - TargetLowering::ConstraintType -TargetLowering::getConstraintType(const std::string &Constraint) const { +TargetLowering::getConstraintType(StringRef Constraint) const { unsigned S = Constraint.size(); if (S == 1) { @@ -2140,7 +2152,7 @@ TargetLowering::getConstraintType(const std::string &Constraint) const { } if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') { - if (S == 8 && !Constraint.compare(1, 6, "memory", 6)) // "{memory}" + if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}" return C_Memory; return C_Register; } @@ -2206,8 +2218,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), C ? SDLoc(C) : SDLoc(), Op.getValueType(), Offs)); - return; } + return; } if (C) { // just C, no GV. // Simple constants are not allowed for 's'. @@ -2217,8 +2229,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // ScheduleDAGSDNodes::EmitNode, which is very generic. Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), SDLoc(C), MVT::i64)); - return; } + return; } break; } @@ -2227,7 +2239,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::pair<unsigned, const TargetRegisterClass *> TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, - const std::string &Constraint, + StringRef Constraint, MVT VT) const { if (Constraint.empty() || Constraint[0] != '{') return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr)); @@ -2293,7 +2305,8 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { /// If this returns an empty vector, and if the constraint string itself /// isn't empty, there was an error parsing. TargetLowering::AsmOperandInfoVector -TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI, +TargetLowering::ParseConstraints(const DataLayout &DL, + const TargetRegisterInfo *TRI, ImmutableCallSite CS) const { /// ConstraintOperands - Information about all of the constraints. AsmOperandInfoVector ConstraintOperands; @@ -2329,10 +2342,11 @@ TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI, assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast<StructType>(CS.getType())) { - OpInfo.ConstraintVT = getSimpleValueType(STy->getElementType(ResNo)); + OpInfo.ConstraintVT = + getSimpleValueType(DL, STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = getSimpleValueType(CS.getType()); + OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType()); } ++ResNo; break; @@ -2361,7 +2375,7 @@ TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI, // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = getDataLayout()->getTypeSizeInBits(OpTy); + unsigned BitSize = DL.getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -2375,8 +2389,7 @@ TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI, break; } } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { - unsigned PtrSize - = getDataLayout()->getPointerSizeInBits(PT->getAddressSpace()); + unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace()); OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); } else { OpInfo.ConstraintVT = MVT::getVT(OpTy, true); @@ -2684,7 +2697,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, if (ShAmt) { // TODO: For UDIV use SRL instead of SRA. SDValue Amt = - DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType())); + DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType(), + DAG.getDataLayout())); SDNodeFlags Flags; Flags.setExact(true); Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags); @@ -2750,17 +2764,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); Created->push_back(Q.getNode()); } + auto &DL = DAG.getDataLayout(); // Shift right algebraic if shift value is nonzero if (magics.s > 0) { - Q = DAG.getNode(ISD::SRA, dl, VT, Q, - DAG.getConstant(magics.s, dl, - getShiftAmountTy(Q.getValueType()))); + Q = DAG.getNode( + ISD::SRA, dl, VT, Q, + DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL))); Created->push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient - SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, - getShiftAmountTy(Q.getValueType()))); + SDValue T = + DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, + getShiftAmountTy(Q.getValueType(), DL))); Created->push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); } @@ -2776,6 +2792,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, EVT VT = N->getValueType(0); SDLoc dl(N); + auto &DL = DAG.getDataLayout(); // Check to see if we can do this. // FIXME: We should be more aggressive here. @@ -2792,9 +2809,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, // the divided value upfront. if (magics.a != 0 && !Divisor[0]) { unsigned Shift = Divisor.countTrailingZeros(); - Q = DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(Shift, dl, - getShiftAmountTy(Q.getValueType()))); + Q = DAG.getNode( + ISD::SRL, dl, VT, Q, + DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL))); Created->push_back(Q.getNode()); // Get magic number for the shifted divisor. @@ -2819,21 +2836,22 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, if (magics.a == 0) { assert(magics.s < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); - return DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(magics.s, dl, - getShiftAmountTy(Q.getValueType()))); + return DAG.getNode( + ISD::SRL, dl, VT, Q, + DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL))); } else { SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); Created->push_back(NPQ.getNode()); - NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, - DAG.getConstant(1, dl, - getShiftAmountTy(NPQ.getValueType()))); + NPQ = DAG.getNode( + ISD::SRL, dl, VT, NPQ, + DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL))); Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); Created->push_back(NPQ.getNode()); - return DAG.getNode(ISD::SRL, dl, VT, NPQ, - DAG.getConstant(magics.s - 1, dl, - getShiftAmountTy(NPQ.getValueType()))); + return DAG.getNode( + ISD::SRL, dl, VT, NPQ, + DAG.getConstant(magics.s - 1, dl, + getShiftAmountTy(NPQ.getValueType(), DL))); } } @@ -2919,8 +2937,9 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, if (!LH.getNode() && !RH.getNode() && isOperationLegalOrCustom(ISD::SRL, VT) && isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + auto &DL = DAG.getDataLayout(); unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits(); - SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT)); + SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL)); LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift); LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift); @@ -2980,14 +2999,15 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); - SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT, - DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), - DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT))); + auto &DL = DAG.getDataLayout(); + SDValue ExponentBits = DAG.getNode( + ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), + DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL))); SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); - SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT, - DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), - DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT))); + SDValue Sign = DAG.getNode( + ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), + DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL))); Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); SDValue R = DAG.getNode(ISD::OR, dl, IntVT, @@ -2996,17 +3016,17 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, R = DAG.getZExtOrTrunc(R, dl, NVT); - - R = DAG.getSelectCC(dl, Exponent, ExponentLoBit, - DAG.getNode(ISD::SHL, dl, NVT, R, - DAG.getZExtOrTrunc( - DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), - dl, getShiftAmountTy(IntVT))), - DAG.getNode(ISD::SRL, dl, NVT, R, - DAG.getZExtOrTrunc( - DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), - dl, getShiftAmountTy(IntVT))), - ISD::SETGT); + R = DAG.getSelectCC( + dl, Exponent, ExponentLoBit, + DAG.getNode(ISD::SHL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), + dl, getShiftAmountTy(IntVT, DL))), + DAG.getNode(ISD::SRL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), + dl, getShiftAmountTy(IntVT, DL))), + ISD::SETGT); SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, DAG.getNode(ISD::XOR, dl, NVT, R, Sign), diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index 0e89bad5f26f..00db94256844 100644 --- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -15,9 +15,5 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; -TargetSelectionDAGInfo::TargetSelectionDAGInfo(const DataLayout *DL) - : DL(DL) { -} - TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { } diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 116fd5be0337..d236e1f5ab6f 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -45,7 +45,6 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { class SjLjEHPrepare : public FunctionPass { - const TargetMachine *TM; Type *doubleUnderDataTy; Type *doubleUnderJBufTy; Type *FunctionContextTy; @@ -63,7 +62,7 @@ class SjLjEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPrepare(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {} + explicit SjLjEHPrepare() : FunctionPass(ID) {} bool doInitialization(Module &M) override; bool runOnFunction(Function &F) override; @@ -83,11 +82,11 @@ private: } // end anonymous namespace char SjLjEHPrepare::ID = 0; +INITIALIZE_PASS(SjLjEHPrepare, "sjljehprepare", "Prepare SjLj exceptions", + false, false) // Public Interface To the SjLjEHPrepare pass. -FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) { - return new SjLjEHPrepare(TM); -} +FunctionPass *llvm::createSjLjEHPreparePass() { return new SjLjEHPrepare(); } // doInitialization - Set up decalarations and types needed to process // exceptions. bool SjLjEHPrepare::doInitialization(Module &M) { @@ -196,9 +195,8 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. - const TargetLowering *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); - unsigned Align = - TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); + auto &DL = F.getParent()->getDataLayout(); + unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy); FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context", EntryBB->begin()); diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp index d88be575d56c..855058358fe4 100644 --- a/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -49,7 +49,6 @@ namespace { /// information provided by this pass is optional and not required by the /// aformentioned intrinsic to function. class StackMapLiveness : public MachineFunctionPass { - MachineFunction *MF; const TargetRegisterInfo *TRI; LivePhysRegs LiveRegs; @@ -68,14 +67,14 @@ public: private: /// \brief Performs the actual liveness calculation for the function. - bool calculateLiveness(); + bool calculateLiveness(MachineFunction &MF); /// \brief Add the current register live set to the instruction. - void addLiveOutSetToMI(MachineInstr &MI); + void addLiveOutSetToMI(MachineFunction &MF, MachineInstr &MI); /// \brief Create a register mask and initialize it with the registers from /// the register live set. - uint32_t *createRegisterMask() const; + uint32_t *createRegisterMask(MachineFunction &MF) const; }; } // namespace @@ -95,8 +94,7 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const { // We preserve all information. AU.setPreservesAll(); AU.setPreservesCFG(); - // Default dependencie for all MachineFunction passes. - AU.addRequired<MachineFunctionAnalysis>(); + MachineFunctionPass::getAnalysisUsage(AU); } /// Calculate the liveness information for the given machine function. @@ -106,7 +104,6 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName() << " **********\n"); - this->MF = &MF; TRI = MF.getSubtarget().getRegisterInfo(); ++NumStackMapFuncVisited; @@ -115,25 +112,23 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { ++NumStackMapFuncSkipped; return false; } - return calculateLiveness(); + return calculateLiveness(MF); } /// Performs the actual liveness calculation for the function. -bool StackMapLiveness::calculateLiveness() { +bool StackMapLiveness::calculateLiveness(MachineFunction &MF) { bool HasChanged = false; // For all basic blocks in the function. - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - DEBUG(dbgs() << "****** BB " << MBBI->getName() << " ******\n"); + for (auto &MBB : MF) { + DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n"); LiveRegs.init(TRI); - LiveRegs.addLiveOuts(MBBI); + LiveRegs.addLiveOuts(&MBB); bool HasStackMap = false; // Reverse iterate over all instructions and add the current live register // set to an instruction if we encounter a patchpoint instruction. - for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(), - E = MBBI->rend(); I != E; ++I) { + for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) { if (I->getOpcode() == TargetOpcode::PATCHPOINT) { - addLiveOutSetToMI(*I); + addLiveOutSetToMI(MF, *I); HasChanged = true; HasStackMap = true; ++NumStackMaps; @@ -149,21 +144,23 @@ bool StackMapLiveness::calculateLiveness() { } /// Add the current register live set to the instruction. -void StackMapLiveness::addLiveOutSetToMI(MachineInstr &MI) { - uint32_t *Mask = createRegisterMask(); +void StackMapLiveness::addLiveOutSetToMI(MachineFunction &MF, + MachineInstr &MI) { + uint32_t *Mask = createRegisterMask(MF); MachineOperand MO = MachineOperand::CreateRegLiveOut(Mask); - MI.addOperand(*MF, MO); + MI.addOperand(MF, MO); } /// Create a register mask and initialize it with the registers from the /// register live set. -uint32_t *StackMapLiveness::createRegisterMask() const { +uint32_t *StackMapLiveness::createRegisterMask(MachineFunction &MF) const { // The mask is owned and cleaned up by the Machine Function. - uint32_t *Mask = MF->allocateRegisterMask(TRI->getNumRegs()); - for (LivePhysRegs::const_iterator RI = LiveRegs.begin(), RE = LiveRegs.end(); - RI != RE; ++RI) - Mask[*RI / 32] |= 1U << (*RI % 32); + uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs()); + for (auto Reg : LiveRegs) + Mask[Reg / 32] |= 1U << (Reg % 32); + // Give the target a chance to adjust the mask. TRI->adjustStackMapLiveOutMask(Mask); + return Mask; } diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 1e8e03f9a7df..116eef66c580 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -29,17 +29,17 @@ using namespace llvm; #define DEBUG_TYPE "stackmaps" -static cl::opt<int> StackMapVersion("stackmap-version", cl::init(1), - cl::desc("Specify the stackmap encoding version (default = 1)")); +static cl::opt<int> StackMapVersion( + "stackmap-version", cl::init(1), + cl::desc("Specify the stackmap encoding version (default = 1)")); const char *StackMaps::WSMP = "Stack Maps: "; PatchPointOpers::PatchPointOpers(const MachineInstr *MI) - : MI(MI), - HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && - !MI->getOperand(0).isImplicit()), - IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) -{ + : MI(MI), HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + !MI->getOperand(0).isImplicit()), + IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == + CallingConv::AnyReg) { #ifndef NDEBUG unsigned CheckStartIdx = 0, e = MI->getNumOperands(); while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() && @@ -76,30 +76,31 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { /// Go up the super-register chain until we hit a valid dwarf register number. static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { - int RegNo = TRI->getDwarfRegNum(Reg, false); - for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR) - RegNo = TRI->getDwarfRegNum(*SR, false); + int RegNum = TRI->getDwarfRegNum(Reg, false); + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNum < 0; ++SR) + RegNum = TRI->getDwarfRegNum(*SR, false); - assert(RegNo >= 0 && "Invalid Dwarf register number."); - return (unsigned) RegNo; + assert(RegNum >= 0 && "Invalid Dwarf register number."); + return (unsigned)RegNum; } MachineInstr::const_mop_iterator StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, - MachineInstr::const_mop_iterator MOE, - LocationVec &Locs, LiveOutVec &LiveOuts) const { + MachineInstr::const_mop_iterator MOE, LocationVec &Locs, + LiveOutVec &LiveOuts) const { const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo(); if (MOI->isImm()) { switch (MOI->getImm()) { - default: llvm_unreachable("Unrecognized operand type."); + default: + llvm_unreachable("Unrecognized operand type."); case StackMaps::DirectMemRefOp: { unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits(); assert((Size % 8) == 0 && "Need pointer size in bytes."); Size /= 8; unsigned Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); - Locs.push_back(Location(StackMaps::Location::Direct, Size, - getDwarfRegNum(Reg, TRI), Imm)); + Locs.emplace_back(StackMaps::Location::Direct, Size, + getDwarfRegNum(Reg, TRI), Imm); break; } case StackMaps::IndirectMemRefOp: { @@ -107,15 +108,15 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, assert(Size > 0 && "Need a valid size for indirect memory locations."); unsigned Reg = (++MOI)->getReg(); int64_t Imm = (++MOI)->getImm(); - Locs.push_back(Location(StackMaps::Location::Indirect, Size, - getDwarfRegNum(Reg, TRI), Imm)); + Locs.emplace_back(StackMaps::Location::Indirect, Size, + getDwarfRegNum(Reg, TRI), Imm); break; } case StackMaps::ConstantOp: { ++MOI; assert(MOI->isImm() && "Expected constant operand."); int64_t Imm = MOI->getImm(); - Locs.push_back(Location(Location::Constant, sizeof(int64_t), 0, Imm)); + Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, Imm); break; } } @@ -137,14 +138,13 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, assert(!MOI->getSubReg() && "Physical subreg still around."); unsigned Offset = 0; - unsigned RegNo = getDwarfRegNum(MOI->getReg(), TRI); - unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false); - unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, MOI->getReg()); + unsigned DwarfRegNum = getDwarfRegNum(MOI->getReg(), TRI); + unsigned LLVMRegNum = TRI->getLLVMRegNum(DwarfRegNum, false); + unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNum, MOI->getReg()); if (SubRegIdx) Offset = TRI->getSubRegIdxOffset(SubRegIdx); - Locs.push_back( - Location(Location::Register, RC->getSize(), RegNo, Offset)); + Locs.emplace_back(Location::Register, RC->getSize(), DwarfRegNum, Offset); return ++MOI; } @@ -165,19 +165,19 @@ void StackMaps::print(raw_ostream &OS) { OS << WSMP << "callsite " << CSI.ID << "\n"; OS << WSMP << " has " << CSLocs.size() << " locations\n"; - unsigned OperIdx = 0; + unsigned Idx = 0; for (const auto &Loc : CSLocs) { - OS << WSMP << " Loc " << OperIdx << ": "; - switch (Loc.LocType) { + OS << WSMP << "\t\tLoc " << Idx << ": "; + switch (Loc.Type) { case Location::Unprocessed: OS << "<Unprocessed operand>"; break; case Location::Register: OS << "Register "; - if (TRI) - OS << TRI->getName(Loc.Reg); - else - OS << Loc.Reg; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; break; case Location::Direct: OS << "Direct "; @@ -203,23 +203,23 @@ void StackMaps::print(raw_ostream &OS) { OS << "Constant Index " << Loc.Offset; break; } - OS << " [encoding: .byte " << Loc.LocType << ", .byte " << Loc.Size + OS << "\t[encoding: .byte " << Loc.Type << ", .byte " << Loc.Size << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n"; - OperIdx++; + Idx++; } - OS << WSMP << " has " << LiveOuts.size() << " live-out registers\n"; + OS << WSMP << "\thas " << LiveOuts.size() << " live-out registers\n"; - OperIdx = 0; + Idx = 0; for (const auto &LO : LiveOuts) { - OS << WSMP << " LO " << OperIdx << ": "; + OS << WSMP << "\t\tLO " << Idx << ": "; if (TRI) OS << TRI->getName(LO.Reg); else OS << LO.Reg; - OS << " [encoding: .short " << LO.RegNo << ", .byte 0, .byte " + OS << "\t[encoding: .short " << LO.DwarfRegNum << ", .byte 0, .byte " << LO.Size << "]\n"; - OperIdx++; + Idx++; } } } @@ -227,9 +227,9 @@ void StackMaps::print(raw_ostream &OS) { /// Create a live-out register record for the given register Reg. StackMaps::LiveOutReg StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { - unsigned RegNo = getDwarfRegNum(Reg, TRI); + unsigned DwarfRegNum = getDwarfRegNum(Reg, TRI); unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); - return LiveOutReg(Reg, RegNo, Size); + return LiveOutReg(Reg, DwarfRegNum, Size); } /// Parse the register live-out mask and return a vector of live-out registers @@ -248,11 +248,16 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { // We don't need to keep track of a register if its super-register is already // in the list. Merge entries that refer to the same dwarf register and use // the maximum size that needs to be spilled. - std::sort(LiveOuts.begin(), LiveOuts.end()); - for (LiveOutVec::iterator I = LiveOuts.begin(), E = LiveOuts.end(); - I != E; ++I) { - for (LiveOutVec::iterator II = std::next(I); II != E; ++II) { - if (I->RegNo != II->RegNo) { + + std::sort(LiveOuts.begin(), LiveOuts.end(), + [](const LiveOutReg &LHS, const LiveOutReg &RHS) { + // Only sort by the dwarf register number. + return LHS.DwarfRegNum < RHS.DwarfRegNum; + }); + + for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) { + for (auto II = std::next(I); II != E; ++II) { + if (I->DwarfRegNum != II->DwarfRegNum) { // Skip all the now invalid entries. I = --II; break; @@ -260,11 +265,15 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { I->Size = std::max(I->Size, II->Size); if (TRI->isSuperRegister(I->Reg, II->Reg)) I->Reg = II->Reg; - II->MarkInvalid(); + II->Reg = 0; // mark for deletion. } } - LiveOuts.erase(std::remove_if(LiveOuts.begin(), LiveOuts.end(), - LiveOutReg::IsInvalid), LiveOuts.end()); + + LiveOuts.erase( + std::remove_if(LiveOuts.begin(), LiveOuts.end(), + [](const LiveOutReg &LO) { return LO.Reg == 0; }), + LiveOuts.end()); + return LiveOuts; } @@ -282,8 +291,8 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, if (recordResult) { assert(PatchPointOpers(&MI).hasDef() && "Stackmap has no return value."); - parseOperand(MI.operands_begin(), std::next(MI.operands_begin()), - Locations, LiveOuts); + parseOperand(MI.operands_begin(), std::next(MI.operands_begin()), Locations, + LiveOuts); } // Parse operands. @@ -292,33 +301,31 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, } // Move large constants into the constant pool. - for (LocationVec::iterator I = Locations.begin(), E = Locations.end(); - I != E; ++I) { + for (auto &Loc : Locations) { // Constants are encoded as sign-extended integers. // -1 is directly encoded as .long 0xFFFFFFFF with no constant pool. - if (I->LocType == Location::Constant && !isInt<32>(I->Offset)) { - I->LocType = Location::ConstantIndex; + if (Loc.Type == Location::Constant && !isInt<32>(Loc.Offset)) { + Loc.Type = Location::ConstantIndex; // ConstPool is intentionally a MapVector of 'uint64_t's (as // opposed to 'int64_t's). We should never be in a situation // where we have to insert either the tombstone or the empty // keys into a map, and for a DenseMap<uint64_t, T> these are // (uint64_t)0 and (uint64_t)-1. They can be and are // represented using 32 bit integers. - - assert((uint64_t)I->Offset != DenseMapInfo<uint64_t>::getEmptyKey() && - (uint64_t)I->Offset != DenseMapInfo<uint64_t>::getTombstoneKey() && + assert((uint64_t)Loc.Offset != DenseMapInfo<uint64_t>::getEmptyKey() && + (uint64_t)Loc.Offset != + DenseMapInfo<uint64_t>::getTombstoneKey() && "empty and tombstone keys should fit in 32 bits!"); - auto Result = ConstPool.insert(std::make_pair(I->Offset, I->Offset)); - I->Offset = Result.first - ConstPool.begin(); + auto Result = ConstPool.insert(std::make_pair(Loc.Offset, Loc.Offset)); + Loc.Offset = Result.first - ConstPool.begin(); } } // Create an expression to calculate the offset of the callsite from function // entry. const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub( - MCSymbolRefExpr::create(MILabel, OutContext), - MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), - OutContext); + MCSymbolRefExpr::create(MILabel, OutContext), + MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext); CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations), std::move(LiveOuts)); @@ -326,10 +333,10 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, // Record the stack size of the current function. const MachineFrameInfo *MFI = AP.MF->getFrameInfo(); const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo(); - const bool DynamicFrameSize = MFI->hasVarSizedObjects() || - RegInfo->needsStackRealignment(*(AP.MF)); + bool HasDynamicFrameSize = + MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF)); FnStackSize[AP.CurrentFnSym] = - DynamicFrameSize ? UINT64_MAX : MFI->getStackSize(); + HasDynamicFrameSize ? UINT64_MAX : MFI->getStackSize(); } void StackMaps::recordStackMap(const MachineInstr &MI) { @@ -346,25 +353,23 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { PatchPointOpers opers(&MI); int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm(); - MachineInstr::const_mop_iterator MOI = - std::next(MI.operands_begin(), opers.getStackMapStartIdx()); + auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx()); recordStackMapOpers(MI, ID, MOI, MI.operands_end(), opers.isAnyReg() && opers.hasDef()); #ifndef NDEBUG // verify anyregcc - LocationVec &Locations = CSInfos.back().Locations; + auto &Locations = CSInfos.back().Locations; if (opers.isAnyReg()) { unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm(); - for (unsigned i = 0, e = (opers.hasDef() ? NArgs+1 : NArgs); i != e; ++i) - assert(Locations[i].LocType == Location::Register && + for (unsigned i = 0, e = (opers.hasDef() ? NArgs + 1 : NArgs); i != e; ++i) + assert(Locations[i].Type == Location::Register && "anyreg arg must be in reg."); } #endif } void StackMaps::recordStatepoint(const MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::STATEPOINT && - "expected statepoint"); + assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint"); StatepointOpers opers(&MI); // Record all the deopt and gc operands (they're contiguous and run from the @@ -387,8 +392,8 @@ void StackMaps::recordStatepoint(const MachineInstr &MI) { void StackMaps::emitStackmapHeader(MCStreamer &OS) { // Header. OS.EmitIntValue(StackMapVersion, 1); // Version. - OS.EmitIntValue(0, 1); // Reserved. - OS.EmitIntValue(0, 2); // Reserved. + OS.EmitIntValue(0, 1); // Reserved. + OS.EmitIntValue(0, 2); // Reserved. // Num functions. DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n'); @@ -412,7 +417,7 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) { DEBUG(dbgs() << WSMP << "functions:\n"); for (auto const &FR : FnStackSize) { DEBUG(dbgs() << WSMP << "function addr: " << FR.first - << " frame size: " << FR.second); + << " frame size: " << FR.second); OS.EmitSymbolValue(FR.first, 8); OS.EmitIntValue(FR.second, 8); } @@ -424,7 +429,7 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) { void StackMaps::emitConstantPoolEntries(MCStreamer &OS) { // Constant pool entries. DEBUG(dbgs() << WSMP << "constants:\n"); - for (auto ConstEntry : ConstPool) { + for (const auto &ConstEntry : ConstPool) { DEBUG(dbgs() << WSMP << ConstEntry.second << '\n'); OS.EmitIntValue(ConstEntry.second, 8); } @@ -489,7 +494,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) { OS.EmitIntValue(CSLocs.size(), 2); for (const auto &Loc : CSLocs) { - OS.EmitIntValue(Loc.LocType, 1); + OS.EmitIntValue(Loc.Type, 1); OS.EmitIntValue(Loc.Size, 1); OS.EmitIntValue(Loc.Reg, 2); OS.EmitIntValue(Loc.Offset, 4); @@ -500,7 +505,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) { OS.EmitIntValue(LiveOuts.size(), 2); for (const auto &LO : LiveOuts) { - OS.EmitIntValue(LO.RegNo, 2); + OS.EmitIntValue(LO.DwarfRegNum, 2); OS.EmitIntValue(0, 1); OS.EmitIntValue(LO.Size, 1); } @@ -511,7 +516,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) { /// Serialize the stackmap data. void StackMaps::serializeToStackMapSection() { - (void) WSMP; + (void)WSMP; // Bail out if there's no stack map data. assert((!CSInfos.empty() || (CSInfos.empty() && ConstPool.empty())) && "Expected empty constant pool too!"); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 0824d6f91db0..bcea37a3aafa 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -122,7 +122,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, // If an array has more than SSPBufferSize bytes of allocated space, then we // emit stack protectors. - if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) { + if (SSPBufferSize <= M->getDataLayout().getTypeAllocSize(AT)) { IsLarge = true; return true; } diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 56383247eadb..f3cccd82a5c5 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -11,9 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/BitVector.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -54,3 +57,30 @@ bool TargetFrameLowering::needsFrameIndexResolution( const MachineFunction &MF) const { return MF.getFrameInfo()->hasStackObjects(); } + +void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + // Get the callee saved register list... + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); + + // Early exit if there are no callee saved registers. + if (!CSRegs || CSRegs[0] == 0) + return; + + SavedRegs.resize(TRI.getNumRegs()); + + // In Naked functions we aren't going to save any registers. + if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) + return; + + // Functions which call __builtin_unwind_init get all their registers saved. + bool CallsUnwindInit = MF.getMMI().callsUnwindInit(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (CallsUnwindInit || MRI.isPhysRegModified(Reg)) + SavedRegs.set(Reg); + } +} diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 78492a6e8818..ecfd65931574 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -750,7 +750,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { initActions(); // Perform these initializations only once. - IsLittleEndian = getDataLayout()->isLittleEndian(); MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = 4; @@ -879,28 +878,17 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); } -MVT TargetLoweringBase::getPointerTy(uint32_t AS) const { - return MVT::getIntegerVT(getPointerSizeInBits(AS)); +MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, + EVT) const { + return MVT::getIntegerVT(8 * DL.getPointerSize(0)); } -unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const { - return getDataLayout()->getPointerSizeInBits(AS); -} - -unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { - assert(Ty->isPointerTy()); - return getPointerSizeInBits(Ty->getPointerAddressSpace()); -} - -MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8 * getDataLayout()->getPointerSize(0)); -} - -EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { +EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, + const DataLayout &DL) const { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) return LHSTy; - return getScalarShiftAmountTy(LHSTy); + return getScalarShiftAmountTy(DL, LHSTy); } /// canOpTrap - Returns true if the operation can trap for the value type. @@ -1398,9 +1386,10 @@ void TargetLoweringBase::computeRegisterProperties( } } -EVT TargetLoweringBase::getSetCCResultType(LLVMContext &, EVT VT) const { +EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &, + EVT VT) const { assert(!VT.isVector() && "No default SetCC type for vectors!"); - return getPointerTy(0).SimpleTy; + return getPointerTy(DL).SimpleTy; } MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { @@ -1485,11 +1474,11 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT /// type of the given function. This does not require a DAG or a return value, /// and is suitable for use before any DAGs for the function are constructed. /// TODO: Move this out of TargetLowering.cpp. -void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, +void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr, SmallVectorImpl<ISD::OutputArg> &Outs, - const TargetLowering &TLI) { + const TargetLowering &TLI, const DataLayout &DL) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, ReturnType, ValueVTs); + ComputeValueVTs(TLI, DL, ReturnType, ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -1534,8 +1523,9 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. -unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const { - return getDataLayout()->getABITypeAlignment(Ty); +unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty, + const DataLayout &DL) const { + return DL.getABITypeAlignment(Ty); } //===----------------------------------------------------------------------===// @@ -1614,9 +1604,10 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { } std::pair<unsigned, MVT> -TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const { +TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, + Type *Ty) const { LLVMContext &C = Ty->getContext(); - EVT MTy = getValueType(Ty); + EVT MTy = getValueType(DL, Ty); unsigned Cost = 1; // We keep legalizing the type until we find a legal kind. We assume that @@ -1642,8 +1633,8 @@ TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. -bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // The default implementation of this implements a conservative RISCy, r+r and // r+i addr mode. diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index e84bea63995e..1e30821dc741 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1215,11 +1215,11 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // addl %esi, %edi // movl %edi, %eax // ret - bool commuted = false; + bool Commuted = false; // If it's profitable to commute, try to do so. if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) { - commuted = true; + Commuted = true; ++NumCommuted; if (AggressiveCommute) ++NumAggrCommuted; @@ -1232,7 +1232,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // If there is one more use of regB later in the same MBB, consider // re-schedule this MI below it. - if (!commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) { + if (!Commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) { ++NumReSchedDowns; return true; } @@ -1250,7 +1250,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, } // Return if it is commuted but 3 addr conversion is failed. - if (commuted) + if (Commuted) return false; // If there is one more use of regB later in the same MBB, consider diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 2912bdd63426..02341b4d66b8 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -163,7 +163,6 @@ class VirtRegRewriter : public MachineFunctionPass { SlotIndexes *Indexes; LiveIntervals *LIS; VirtRegMap *VRM; - SparseSet<unsigned> PhysRegs; void rewrite(); void addMBBLiveIns(); @@ -319,54 +318,15 @@ void VirtRegRewriter::rewrite() { SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; - SmallPtrSet<const MachineInstr *, 4> NoReturnInsts; - - // Here we have a SparseSet to hold which PhysRegs are actually encountered - // in the MF we are about to iterate over so that later when we call - // setPhysRegUsed, we are only doing it for physRegs that were actually found - // in the program and not for all of the possible physRegs for the given - // target architecture. If the target has a lot of physRegs, then for a small - // program there will be a significant compile time reduction here. - PhysRegs.clear(); - PhysRegs.setUniverse(TRI->getNumRegs()); - - // The function with uwtable should guarantee that the stack unwinder - // can unwind the stack to the previous frame. Thus, we can't apply the - // noreturn optimization if the caller function has uwtable attribute. - bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable); for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); - bool IsExitBB = MBBI->succ_empty(); for (MachineBasicBlock::instr_iterator MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { MachineInstr *MI = MII; ++MII; - // Check if this instruction is a call to a noreturn function. If this - // is a call to noreturn function and we don't need the stack unwinding - // functionality (i.e. this function does not have uwtable attribute and - // the callee function has the nounwind attribute), then we can ignore - // the definitions set by this instruction. - if (!HasUWTable && IsExitBB && MI->isCall()) { - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - MachineOperand &MO = *MOI; - if (!MO.isGlobal()) - continue; - const Function *Func = dyn_cast<Function>(MO.getGlobal()); - if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) || - // We need to keep correct unwind information - // even if the function will not return, since the - // runtime may need it. - !Func->hasFnAttribute(Attribute::NoUnwind)) - continue; - NoReturnInsts.insert(MI); - break; - } - } - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; @@ -375,15 +335,6 @@ void VirtRegRewriter::rewrite() { if (MO.isRegMask()) MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - // If we encounter a VirtReg or PhysReg then get at the PhysReg and add - // it to the physreg bitset. Later we use only the PhysRegs that were - // actually encountered in the MF to populate the MRI's used physregs. - if (MO.isReg() && MO.getReg()) - PhysRegs.insert( - TargetRegisterInfo::isVirtualRegister(MO.getReg()) ? - VRM->getPhys(MO.getReg()) : - MO.getReg()); - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; unsigned VirtReg = MO.getReg(); @@ -470,29 +421,5 @@ void VirtRegRewriter::rewrite() { } } } - - // Tell MRI about physical registers in use. - if (NoReturnInsts.empty()) { - for (SparseSet<unsigned>::iterator - RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI) - if (!MRI->reg_nodbg_empty(*RegI)) - MRI->setPhysRegUsed(*RegI); - } else { - for (SparseSet<unsigned>::iterator - I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) { - unsigned Reg = *I; - if (MRI->reg_nodbg_empty(Reg)) - continue; - // Check if this register has a use that will impact the rest of the - // code. Uses in debug and noreturn instructions do not impact the - // generated code. - for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) { - if (!NoReturnInsts.count(&It)) { - MRI->setPhysRegUsed(Reg); - break; - } - } - } - } } diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index dbc0d91a01e2..0d26ed333ca7 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -155,7 +155,7 @@ private: // outlined but before the outlined code is pruned from the parent function. DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks; - // Map from outlined handler to call to llvm.frameaddress(1). Only used for + // Map from outlined handler to call to parent local address. Only used for // 32-bit EH. DenseMap<Function *, Value *> HandlerToParentFP; @@ -533,9 +533,9 @@ void WinEHPrepare::findSEHEHReturnPoints( BasicBlock *NextBB; Constant *Selector; if (isSelectorDispatch(BB, CatchHandler, Selector, NextBB)) { - // Split the edge if there is a phi node. Returning from EH to a phi node - // is just as impossible as having a phi after an indirectbr. - if (isa<PHINode>(CatchHandler->begin())) { + // Split the edge if there are multiple predecessors. This creates a place + // where we can insert EH recovery code. + if (!CatchHandler->getSinglePredecessor()) { DEBUG(dbgs() << "splitting EH return edge from " << BB->getName() << " to " << CatchHandler->getName() << '\n'); BBI = CatchHandler = SplitCriticalEdge( @@ -616,6 +616,26 @@ void WinEHPrepare::demoteValuesLiveAcrossHandlers( // identifyEHBlocks() should have been called before this function. assert(!NormalBlocks.empty()); + // Try to avoid demoting EH pointer and selector values. They get in the way + // of our pattern matching. + SmallPtrSet<Instruction *, 10> EHVals; + for (BasicBlock &BB : F) { + LandingPadInst *LP = BB.getLandingPadInst(); + if (!LP) + continue; + EHVals.insert(LP); + for (User *U : LP->users()) { + auto *EI = dyn_cast<ExtractValueInst>(U); + if (!EI) + continue; + EHVals.insert(EI); + for (User *U2 : EI->users()) { + if (auto *PN = dyn_cast<PHINode>(U2)) + EHVals.insert(PN); + } + } + } + SetVector<Argument *> ArgsToDemote; SetVector<Instruction *> InstrsToDemote; for (BasicBlock &BB : F) { @@ -641,7 +661,11 @@ void WinEHPrepare::demoteValuesLiveAcrossHandlers( continue; } + // Don't demote EH values. auto *OpI = cast<Instruction>(Op); + if (EHVals.count(OpI)) + continue; + BasicBlock *OpBB = OpI->getParent(); // If a value is produced and consumed in the same BB, we don't need to // demote it. @@ -822,7 +846,8 @@ bool WinEHPrepare::prepareExceptionHandlers( LPad->replaceAllUsesWith(UndefValue::get(LPad->getType())); // Rewrite uses of the exception pointer to loads of an alloca. - for (Instruction *E : SEHCodeUses) { + while (!SEHCodeUses.empty()) { + Instruction *E = SEHCodeUses.pop_back_val(); SmallVector<Use *, 4> Uses; for (Use &U : E->uses()) Uses.push_back(&U); @@ -830,13 +855,10 @@ bool WinEHPrepare::prepareExceptionHandlers( auto *I = cast<Instruction>(U->getUser()); if (isa<ResumeInst>(I)) continue; - LoadInst *LI; if (auto *Phi = dyn_cast<PHINode>(I)) - LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false, - Phi->getIncomingBlock(*U)); + SEHCodeUses.push_back(Phi); else - LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I); - U->set(LI); + U->set(new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I)); } E->replaceAllUsesWith(UndefValue::get(E->getType())); E->eraseFromParent(); @@ -953,16 +975,16 @@ bool WinEHPrepare::prepareExceptionHandlers( Builder.SetInsertPoint(Entry->getFirstInsertionPt()); Function *FrameEscapeFn = - Intrinsic::getDeclaration(M, Intrinsic::frameescape); + Intrinsic::getDeclaration(M, Intrinsic::localescape); Function *RecoverFrameFn = - Intrinsic::getDeclaration(M, Intrinsic::framerecover); + Intrinsic::getDeclaration(M, Intrinsic::localrecover); SmallVector<Value *, 8> AllocasToEscape; - // Scan the entry block for an existing call to llvm.frameescape. We need to + // Scan the entry block for an existing call to llvm.localescape. We need to // keep escaping those objects. for (Instruction &I : F.front()) { auto *II = dyn_cast<IntrinsicInst>(&I); - if (II && II->getIntrinsicID() == Intrinsic::frameescape) { + if (II && II->getIntrinsicID() == Intrinsic::localescape) { auto Args = II->arg_operands(); AllocasToEscape.append(Args.begin(), Args.end()); II->eraseFromParent(); @@ -971,7 +993,7 @@ bool WinEHPrepare::prepareExceptionHandlers( } // Finally, replace all of the temporary allocas for frame variables used in - // the outlined handlers with calls to llvm.framerecover. + // the outlined handlers with calls to llvm.localrecover. for (auto &VarInfoEntry : FrameVarInfo) { Value *ParentVal = VarInfoEntry.first; TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second; @@ -992,7 +1014,7 @@ bool WinEHPrepare::prepareExceptionHandlers( llvm::Value *FP = HandlerToParentFP[HandlerFn]; assert(FP); - // FIXME: Sink this framerecover into the blocks where it is used. + // FIXME: Sink this localrecover into the blocks where it is used. Builder.SetInsertPoint(TempAlloca); Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc()); Value *RecoverArgs[] = { @@ -1014,7 +1036,7 @@ bool WinEHPrepare::prepareExceptionHandlers( } } // End for each FrameVarInfo entry. - // Insert 'call void (...)* @llvm.frameescape(...)' at the end of the entry + // Insert 'call void (...)* @llvm.localescape(...)' at the end of the entry // block. Builder.SetInsertPoint(&F.getEntryBlock().back()); Builder.CreateCall(FrameEscapeFn, AllocasToEscape); @@ -1595,9 +1617,8 @@ void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue, VMap[Extract] = SelectorValue; } -static bool isFrameAddressCall(const Value *V) { - return match(const_cast<Value *>(V), - m_Intrinsic<Intrinsic::frameaddress>(m_SpecificInt(0))); +static bool isLocalAddressCall(const Value *V) { + return match(const_cast<Value *>(V), m_Intrinsic<Intrinsic::localaddress>()); } CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction( @@ -1639,9 +1660,9 @@ CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction( if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) return handleTypeIdFor(VMap, Inst, NewBB); - // When outlining llvm.frameaddress(i32 0), remap that to the second argument, + // When outlining llvm.localaddress(), remap that to the second argument, // which is the FP of the parent. - if (isFrameAddressCall(Inst)) { + if (isLocalAddressCall(Inst)) { VMap[Inst] = ParentFP; return CloningDirector::SkipInstruction; } @@ -1961,7 +1982,7 @@ Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) { // If we're asked to materialize a static alloca, we temporarily create an // alloca in the outlined function and add this to the FrameVarInfo map. When // all the outlining is complete, we'll replace these temporary allocas with - // calls to llvm.framerecover. + // calls to llvm.localrecover. if (auto *AV = dyn_cast<AllocaInst>(V)) { assert(AV->isStaticAlloca() && "cannot materialize un-demoted dynamic alloca"); @@ -1991,7 +2012,7 @@ void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) { // of a catch parameter, add a sentinel to the multimap to indicate that it's // used from another handler. This will prevent us from trying to sink the // alloca into the handler and ensure that the catch parameter is present in - // the call to llvm.frameescape. + // the call to llvm.localescape. FrameVarInfo[V].push_back(getCatchObjectSentinel()); } @@ -2233,16 +2254,16 @@ static void createCleanupHandler(LandingPadActions &Actions, static CallSite matchOutlinedFinallyCall(BasicBlock *BB, Instruction *MaybeCall) { // Look for finally blocks that Clang has already outlined for us. - // %fp = call i8* @llvm.frameaddress(i32 0) + // %fp = call i8* @llvm.localaddress() // call void @"fin$parent"(iN 1, i8* %fp) - if (isFrameAddressCall(MaybeCall) && MaybeCall != BB->getTerminator()) + if (isLocalAddressCall(MaybeCall) && MaybeCall != BB->getTerminator()) MaybeCall = MaybeCall->getNextNode(); CallSite FinallyCall(MaybeCall); if (!FinallyCall || FinallyCall.arg_size() != 2) return CallSite(); if (!match(FinallyCall.getArgument(0), m_SpecificInt(1))) return CallSite(); - if (!isFrameAddressCall(FinallyCall.getArgument(1))) + if (!isLocalAddressCall(FinallyCall.getArgument(1))) return CallSite(); return FinallyCall; } diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index c25ddad33b76..96bcf15e0af0 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -677,7 +677,13 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, // First calculate the address of the symbol or section as it appears // in the objct file if (Sym != Obj.symbol_end()) { - Sym->getAddress(SymAddr); + ErrorOr<uint64_t> SymAddrOrErr = Sym->getAddress(); + if (std::error_code EC = SymAddrOrErr.getError()) { + errs() << "error: failed to compute symbol address: " + << EC.message() << '\n'; + continue; + } + SymAddr = *SymAddrOrErr; // Also remember what section this symbol is in for later Sym->getSection(RSec); } else if (auto *MObj = dyn_cast<MachOObjectFile>(&Obj)) { diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt index 348308897dc4..331d2141b0e2 100644 --- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt +++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt @@ -3,4 +3,6 @@ include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMIntelJITEvents IntelJITEventListener.cpp jitprofiling.c - ) + + LINK_LIBS pthread ${CMAKE_DL_LIBS} +) diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 907144007fdd..a131763193c0 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -113,63 +113,59 @@ void IntelJITEventListener::NotifyObjectEmitted( std::vector<LineNumberInfo> LineInfo; std::string SourceFileName; - if (Sym.getType() == SymbolRef::ST_Function) { - ErrorOr<StringRef> Name = Sym.getName(); - if (!Name) - continue; - - uint64_t Addr; - if (Sym.getAddress(Addr)) - continue; - uint64_t Size = P.second; - - // Record this address in a local vector - Functions.push_back((void*)Addr); - - // Build the function loaded notification message - iJIT_Method_Load FunctionMessage = - FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size); - if (Context) { - DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size); - DILineInfoTable::iterator Begin = Lines.begin(); - DILineInfoTable::iterator End = Lines.end(); - for (DILineInfoTable::iterator It = Begin; It != End; ++It) { - LineInfo.push_back(DILineInfoToIntelJITFormat((uintptr_t)Addr, - It->first, - It->second)); - } - if (LineInfo.size() == 0) { - FunctionMessage.source_file_name = 0; - FunctionMessage.line_number_size = 0; - FunctionMessage.line_number_table = 0; - } else { - // Source line information for the address range is provided as - // a code offset for the start of the corresponding sub-range and - // a source line. JIT API treats offsets in LineNumberInfo structures - // as the end of the corresponding code region. The start of the code - // is taken from the previous element. Need to shift the elements. - - LineNumberInfo last = LineInfo.back(); - last.Offset = FunctionMessage.method_size; - LineInfo.push_back(last); - for (size_t i = LineInfo.size() - 2; i > 0; --i) - LineInfo[i].LineNumber = LineInfo[i - 1].LineNumber; - - SourceFileName = Lines.front().second.FileName; - FunctionMessage.source_file_name = const_cast<char *>(SourceFileName.c_str()); - FunctionMessage.line_number_size = LineInfo.size(); - FunctionMessage.line_number_table = &*LineInfo.begin(); - } - } else { - FunctionMessage.source_file_name = 0; - FunctionMessage.line_number_size = 0; - FunctionMessage.line_number_table = 0; - } - - Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, - &FunctionMessage); - MethodIDs[(void*)Addr] = FunctionMessage.method_id; + if (Sym.getType() != SymbolRef::ST_Function) + continue; + + ErrorOr<StringRef> Name = Sym.getName(); + if (!Name) + continue; + + ErrorOr<uint64_t> AddrOrErr = Sym.getAddress(); + if (AddrOrErr.getError()) + continue; + uint64_t Addr = *AddrOrErr; + uint64_t Size = P.second; + + // Record this address in a local vector + Functions.push_back((void*)Addr); + + // Build the function loaded notification message + iJIT_Method_Load FunctionMessage = + FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size); + DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size); + DILineInfoTable::iterator Begin = Lines.begin(); + DILineInfoTable::iterator End = Lines.end(); + for (DILineInfoTable::iterator It = Begin; It != End; ++It) { + LineInfo.push_back( + DILineInfoToIntelJITFormat((uintptr_t)Addr, It->first, It->second)); } + if (LineInfo.size() == 0) { + FunctionMessage.source_file_name = 0; + FunctionMessage.line_number_size = 0; + FunctionMessage.line_number_table = 0; + } else { + // Source line information for the address range is provided as + // a code offset for the start of the corresponding sub-range and + // a source line. JIT API treats offsets in LineNumberInfo structures + // as the end of the corresponding code region. The start of the code + // is taken from the previous element. Need to shift the elements. + + LineNumberInfo last = LineInfo.back(); + last.Offset = FunctionMessage.method_size; + LineInfo.push_back(last); + for (size_t i = LineInfo.size() - 2; i > 0; --i) + LineInfo[i].LineNumber = LineInfo[i - 1].LineNumber; + + SourceFileName = Lines.front().second.FileName; + FunctionMessage.source_file_name = + const_cast<char *>(SourceFileName.c_str()); + FunctionMessage.line_number_size = LineInfo.size(); + FunctionMessage.line_number_table = &*LineInfo.begin(); + } + + Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, + &FunctionMessage); + MethodIDs[(void*)Addr] = FunctionMessage.method_id; } // To support object unload notification, we need to keep a list of diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt index 1247cbd94930..afea3ecccda4 100644 --- a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt +++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt @@ -21,4 +21,4 @@ type = OptionalLibrary name = IntelJITEvents parent = ExecutionEngine -required_libraries = Core DebugInfoDWARF Support +required_libraries = Core DebugInfoDWARF Support Object ExecutionEngine diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt index e30516eb3b01..7d5550046a56 100644 --- a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt +++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt @@ -21,3 +21,4 @@ type = OptionalLibrary name = OProfileJIT parent = ExecutionEngine +required_libraries = Support Object ExecutionEngine diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index b72033805269..324d07118704 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -88,24 +88,27 @@ void OProfileJITEventListener::NotifyObjectEmitted( // Use symbol info to iterate functions in the object. for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) { SymbolRef Sym = P.first; - if (Sym.getType() == SymbolRef::ST_Function) { - StringRef Name; - uint64_t Addr; - if (Sym.getName(Name)) - continue; - if (Sym.getAddress(Addr)) - continue; - uint64_t Size = P.second; - - if (Wrapper->op_write_native_code(Name.data(), Addr, (void*)Addr, Size) - == -1) { - DEBUG(dbgs() << "Failed to tell OProfile about native function " - << Name << " at [" - << (void*)Addr << "-" << ((char*)Addr + Size) << "]\n"); - continue; - } - // TODO: support line number info (similar to IntelJITEventListener.cpp) + if (Sym.getType() != SymbolRef::ST_Function) + continue; + + ErrorOr<StringRef> NameOrErr = Sym.getName(); + if (NameOrErr.getError()) + continue; + StringRef Name = *NameOrErr; + ErrorOr<uint64_t> AddrOrErr = Sym.getAddress(); + if (AddrOrErr.getError()) + continue; + uint64_t Addr = *AddrOrErr; + uint64_t Size = P.second; + + if (Wrapper->op_write_native_code(Name.data(), Addr, (void *)Addr, Size) == + -1) { + DEBUG(dbgs() << "Failed to tell OProfile about native function " << Name + << " at [" << (void *)Addr << "-" << ((char *)Addr + Size) + << "]\n"); + continue; } + // TODO: support line number info (similar to IntelJITEventListener.cpp) } DebugObjects[Obj.getData().data()] = std::move(DebugObjOwner); @@ -126,8 +129,10 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) { E = DebugObj.symbol_end(); I != E; ++I) { if (I->getType() == SymbolRef::ST_Function) { - uint64_t Addr; - if (I->getAddress(Addr)) continue; + ErrorOr<uint64_t> AddrOrErr = I->getAddress(); + if (AddrOrErr.getError()) + continue; + uint64_t Addr = *AddrOrErr; if (Wrapper->op_unload_native_code(Addr) == -1) { DEBUG(dbgs() diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index fa501824e04a..93287a3a4e71 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -113,28 +113,12 @@ void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress, llvm_unreachable("Attempting to remap address of unknown section!"); } -static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) { - uint64_t Address; - if (std::error_code EC = Sym.getAddress(Address)) +static std::error_code getOffset(const SymbolRef &Sym, SectionRef Sec, + uint64_t &Result) { + ErrorOr<uint64_t> AddressOrErr = Sym.getAddress(); + if (std::error_code EC = AddressOrErr.getError()) return EC; - - if (Address == UnknownAddress) { - Result = UnknownAddress; - return std::error_code(); - } - - const ObjectFile *Obj = Sym.getObject(); - section_iterator SecI(Obj->section_begin()); - if (std::error_code EC = Sym.getSection(SecI)) - return EC; - - if (SecI == Obj->section_end()) { - Result = UnknownAddress; - return std::error_code(); - } - - uint64_t SectionAddress = SecI->getAddress(); - Result = Address - SectionAddress; + Result = *AddressOrErr - Sec.getAddress(); return std::error_code(); } @@ -184,12 +168,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { ErrorOr<StringRef> NameOrErr = I->getName(); Check(NameOrErr.getError()); StringRef Name = *NameOrErr; - uint64_t SectOffset; - Check(getOffset(*I, SectOffset)); section_iterator SI = Obj.section_end(); Check(I->getSection(SI)); if (SI == Obj.section_end()) continue; + uint64_t SectOffset; + Check(getOffset(*I, *SI, SectOffset)); StringRef SectionData; Check(SI->getContents(SectionData)); bool IsCode = SI->isText(); @@ -814,12 +798,16 @@ void RuntimeDyldImpl::resolveExternalSymbols() { report_fatal_error("Program used external function '" + Name + "' which could not be resolved!"); - DEBUG(dbgs() << "Resolving relocations Name: " << Name << "\t" - << format("0x%lx", Addr) << "\n"); - // This list may have been updated when we called getSymbolAddress, so - // don't change this code to get the list earlier. - RelocationList &Relocs = i->second; - resolveRelocationList(Relocs, Addr); + // If Resolver returned UINT64_MAX, the client wants to handle this symbol + // manually and we shouldn't resolve its relocations. + if (Addr != UINT64_MAX) { + DEBUG(dbgs() << "Resolving relocations Name: " << Name << "\t" + << format("0x%lx", Addr) << "\n"); + // This list may have been updated when we called getSymbolAddress, so + // don't change this code to get the list earlier. + RelocationList &Relocs = i->second; + resolveRelocationList(Relocs, Addr); + } } ExternalSymbolRelocations.erase(i); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index 957571b092da..ae199b720223 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -727,7 +727,9 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix, } bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const { - return getRTDyld().getSymbolLocalAddress(Symbol) != nullptr; + if (getRTDyld().getSymbolLocalAddress(Symbol)) + return true; + return !!getRTDyld().Resolver.findSymbol(Symbol); } uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const { diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index f5069c005857..3787950b3b08 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -511,11 +511,54 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section, Insn |= Value & 0xffff; writeBytesUnaligned(Insn, TargetPtr, 4); break; - case ELF::R_MIPS_PC32: + case ELF::R_MIPS_PC32: { + uint32_t FinalAddress = (Section.LoadAddress + Offset); + writeBytesUnaligned(Value - FinalAddress, (uint8_t *)TargetPtr, 4); + break; + } + case ELF::R_MIPS_PC16: { + uint32_t FinalAddress = (Section.LoadAddress + Offset); + Insn &= 0xffff0000; + Insn |= ((Value - FinalAddress) >> 2) & 0xffff; + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + } + case ELF::R_MIPS_PC19_S2: { + uint32_t FinalAddress = (Section.LoadAddress + Offset); + Insn &= 0xfff80000; + Insn |= ((Value - (FinalAddress & ~0x3)) >> 2) & 0x7ffff; + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + } + case ELF::R_MIPS_PC21_S2: { + uint32_t FinalAddress = (Section.LoadAddress + Offset); + Insn &= 0xffe00000; + Insn |= ((Value - FinalAddress) >> 2) & 0x1fffff; + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + } + case ELF::R_MIPS_PC26_S2: { + uint32_t FinalAddress = (Section.LoadAddress + Offset); + Insn &= 0xfc000000; + Insn |= ((Value - FinalAddress) >> 2) & 0x3ffffff; + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + } + case ELF::R_MIPS_PCHI16: { uint32_t FinalAddress = (Section.LoadAddress + Offset); - writeBytesUnaligned(Value + Addend - FinalAddress, (uint8_t *)TargetPtr, 4); + Insn &= 0xffff0000; + Insn |= ((Value - FinalAddress + 0x8000) >> 16) & 0xffff; + writeBytesUnaligned(Insn, TargetPtr, 4); break; } + case ELF::R_MIPS_PCLO16: { + uint32_t FinalAddress = (Section.LoadAddress + Offset); + Insn &= 0xffff0000; + Insn |= (Value - FinalAddress) & 0xffff; + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + } + } } void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) { @@ -1263,12 +1306,24 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( Section.StubOffset += getMaxStubSize(); } } else { - if (RelType == ELF::R_MIPS_HI16) + // FIXME: Calculate correct addends for R_MIPS_HI16, R_MIPS_LO16, + // R_MIPS_PCHI16 and R_MIPS_PCLO16 relocations. + if (RelType == ELF::R_MIPS_HI16 || RelType == ELF::R_MIPS_PCHI16) Value.Addend += (Opcode & 0x0000ffff) << 16; else if (RelType == ELF::R_MIPS_LO16) Value.Addend += (Opcode & 0x0000ffff); else if (RelType == ELF::R_MIPS_32) Value.Addend += Opcode; + else if (RelType == ELF::R_MIPS_PCLO16) + Value.Addend += SignExtend32<16>((Opcode & 0x0000ffff)); + else if (RelType == ELF::R_MIPS_PC16) + Value.Addend += SignExtend32<18>((Opcode & 0x0000ffff) << 2); + else if (RelType == ELF::R_MIPS_PC19_S2) + Value.Addend += SignExtend32<21>((Opcode & 0x0007ffff) << 2); + else if (RelType == ELF::R_MIPS_PC21_S2) + Value.Addend += SignExtend32<23>((Opcode & 0x001fffff) << 2); + else if (RelType == ELF::R_MIPS_PC26_S2) + Value.Addend += SignExtend32<28>((Opcode & 0x03ffffff) << 2); processSimpleRelocation(SectionID, Offset, RelType, Value); } } else if (IsMipsN64ABI) { diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 74b13d60a984..c0741141757c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -89,19 +89,11 @@ RelocationValueRef RuntimeDyldMachO::getRelocationValueRef( } void RuntimeDyldMachO::makeValueAddendPCRel(RelocationValueRef &Value, - const ObjectFile &BaseTObj, const relocation_iterator &RI, unsigned OffsetToNextPC) { - const MachOObjectFile &Obj = - static_cast<const MachOObjectFile &>(BaseTObj); - MachO::any_relocation_info RelInfo = - Obj.getRelocation(RI->getRawDataRefImpl()); - - bool IsPCRel = Obj.getAnyRelocationPCRel(RelInfo); - if (IsPCRel) { - ErrorOr<uint64_t> RelocAddr = RI->getAddress(); - Value.Offset += *RelocAddr + OffsetToNextPC; - } + auto &O = *cast<MachOObjectFile>(RI->getObject()); + section_iterator SecI = O.getRelocationRelocatedSection(RI); + Value.Offset += RI->getOffset() + OffsetToNextPC + SecI->getAddress(); } void RuntimeDyldMachO::dumpRelocationToResolve(const RelocationEntry &RE, diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 36ba8d1b93e7..0d7364f78597 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -95,7 +95,6 @@ protected: /// Make the RelocationValueRef addend PC-relative. void makeValueAddendPCRel(RelocationValueRef &Value, - const ObjectFile &BaseTObj, const relocation_iterator &RI, unsigned OffsetToNextPC); diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h index 99fd6e333b47..7bf764114bae 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h @@ -284,7 +284,7 @@ public: bool IsExtern = Obj.getPlainRelocationExternal(RelInfo); if (!IsExtern && RE.IsPCRel) - makeValueAddendPCRel(Value, Obj, RelI, 1 << RE.Size); + makeValueAddendPCRel(Value, RelI, 1 << RE.Size); RE.Addend = Value.Offset; diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h index 0d9445e84f09..0a24bb2f5eae 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h @@ -74,7 +74,7 @@ public: getRelocationValueRef(Obj, RelI, RE, ObjSectionToID)); if (RE.IsPCRel) - makeValueAddendPCRel(Value, Obj, RelI, 8); + makeValueAddendPCRel(Value, RelI, 8); if ((RE.RelType & 0xf) == MachO::ARM_RELOC_BR24) processBranchRelocation(RE, Value, Stubs); diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h index aceb304abb1e..569a078d7f3d 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h @@ -68,7 +68,7 @@ public: // Value.Addend += RelocAddr + 4; // } if (RE.IsPCRel) - makeValueAddendPCRel(Value, Obj, RelI, 1 << RE.Size); + makeValueAddendPCRel(Value, RelI, 1 << RE.Size); RE.Addend = Value.Offset; diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h index 4b3b01ba3c96..dd56e72f9144 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h @@ -50,7 +50,7 @@ public: bool IsExtern = Obj.getPlainRelocationExternal(RelInfo); if (!IsExtern && RE.IsPCRel) - makeValueAddendPCRel(Value, Obj, RelI, 1 << RE.Size); + makeValueAddendPCRel(Value, RelI, 1 << RE.Size); if (RE.RelType == MachO::X86_64_RELOC_GOT || RE.RelType == MachO::X86_64_RELOC_GOT_LOAD) diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index c3032f4ffc79..546a98670a29 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -190,6 +190,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "sanitize_address"; if (hasAttribute(Attribute::AlwaysInline)) return "alwaysinline"; + if (hasAttribute(Attribute::ArgMemOnly)) + return "argmemonly"; if (hasAttribute(Attribute::Builtin)) return "builtin"; if (hasAttribute(Attribute::ByVal)) @@ -447,6 +449,9 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { llvm_unreachable("dereferenceable_or_null attribute not supported in raw " "format"); break; + case Attribute::ArgMemOnly: + llvm_unreachable("argmemonly attribute not supported in raw format"); + break; } llvm_unreachable("Unsupported attribute type"); } @@ -1356,7 +1361,8 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) { for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds; I = Attribute::AttrKind(I + 1)) { if (I == Attribute::Dereferenceable || - I == Attribute::DereferenceableOrNull) + I == Attribute::DereferenceableOrNull || + I == Attribute::ArgMemOnly) continue; if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) { Attrs[I] = true; diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 70a55186ea9a..f1c6ebd4846e 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -229,6 +229,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { NewFn = nullptr; bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); + assert(F != NewFn && "Intrinsic function upgraded to the same function"); // Upgrade intrinsic attributes. This does not change the function. if (NewFn) @@ -710,16 +711,14 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) { // Upgrade the function and check if it is a totaly new function. Function *NewFn; if (UpgradeIntrinsicFunction(F, NewFn)) { - if (NewFn != F) { - // Replace all uses to the old function with the new one if necessary. - for (Value::user_iterator UI = F->user_begin(), UE = F->user_end(); - UI != UE; ) { - if (CallInst *CI = dyn_cast<CallInst>(*UI++)) - UpgradeIntrinsicCall(CI, NewFn); - } - // Remove old function, no longer used, from the module. - F->eraseFromParent(); + // Replace all uses to the old function with the new one if necessary. + for (Value::user_iterator UI = F->user_begin(), UE = F->user_end(); + UI != UE;) { + if (CallInst *CI = dyn_cast<CallInst>(*UI++)) + UpgradeIntrinsicCall(CI, NewFn); } + // Remove old function, no longer used, from the module. + F->eraseFromParent(); } } diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp index 77cb10d5b6ba..0a0449434a7b 100644 --- a/lib/IR/BasicBlock.cpp +++ b/lib/IR/BasicBlock.cpp @@ -163,47 +163,40 @@ CallInst *BasicBlock::getTerminatingMustTailCall() { } Instruction* BasicBlock::getFirstNonPHI() { - BasicBlock::iterator i = begin(); - // All valid basic blocks should have a terminator, - // which is not a PHINode. If we have an invalid basic - // block we'll get an assertion failure when dereferencing - // a past-the-end iterator. - while (isa<PHINode>(i)) ++i; - return &*i; + for (Instruction &I : *this) + if (!isa<PHINode>(I)) + return &I; + return nullptr; } Instruction* BasicBlock::getFirstNonPHIOrDbg() { - BasicBlock::iterator i = begin(); - // All valid basic blocks should have a terminator, - // which is not a PHINode. If we have an invalid basic - // block we'll get an assertion failure when dereferencing - // a past-the-end iterator. - while (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) ++i; - return &*i; + for (Instruction &I : *this) + if (!isa<PHINode>(I) && !isa<DbgInfoIntrinsic>(I)) + return &I; + return nullptr; } Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() { - // All valid basic blocks should have a terminator, - // which is not a PHINode. If we have an invalid basic - // block we'll get an assertion failure when dereferencing - // a past-the-end iterator. - BasicBlock::iterator i = begin(); - for (;; ++i) { - if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) + for (Instruction &I : *this) { + if (isa<PHINode>(I) || isa<DbgInfoIntrinsic>(I)) continue; - const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i); - if (!II) - break; - if (II->getIntrinsicID() != Intrinsic::lifetime_start && - II->getIntrinsicID() != Intrinsic::lifetime_end) - break; + if (auto *II = dyn_cast<IntrinsicInst>(&I)) + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + + return &I; } - return &*i; + return nullptr; } BasicBlock::iterator BasicBlock::getFirstInsertionPt() { - iterator InsertPt = getFirstNonPHI(); + Instruction *FirstNonPHI = getFirstNonPHI(); + if (!FirstNonPHI) + return end(); + + iterator InsertPt = FirstNonPHI; if (isa<LandingPadInst>(InsertPt)) ++InsertPt; return InsertPt; } diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 23e923d41126..e0e729d534bd 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -1691,6 +1691,14 @@ void LLVMDeleteFunction(LLVMValueRef Fn) { unwrap<Function>(Fn)->eraseFromParent(); } +LLVMValueRef LLVMGetPersonalityFn(LLVMValueRef Fn) { + return wrap(unwrap<Function>(Fn)->getPersonalityFn()); +} + +void LLVMSetPersonalityFn(LLVMValueRef Fn, LLVMValueRef PersonalityFn) { + unwrap<Function>(Fn)->setPersonalityFn(unwrap<Constant>(PersonalityFn)); +} + unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) { if (Function *F = dyn_cast<Function>(unwrap(Fn))) return F->getIntrinsicID(); diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 6a3ff0e8e457..2a90e70af1a3 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -73,37 +73,47 @@ void DIBuilder::trackIfUnresolved(MDNode *N) { } void DIBuilder::finalize() { - if (CUNode) { - CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes)); - - SmallVector<Metadata *, 16> RetainValues; - // Declarations and definitions of the same type may be retained. Some - // clients RAUW these pairs, leaving duplicates in the retained types - // list. Use a set to remove the duplicates while we transform the - // TrackingVHs back into Values. - SmallPtrSet<Metadata *, 16> RetainSet; - for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++) - if (RetainSet.insert(AllRetainTypes[I]).second) - RetainValues.push_back(AllRetainTypes[I]); + if (!CUNode) { + assert(!AllowUnresolvedNodes && + "creating type nodes without a CU is not supported"); + return; + } + + CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes)); + + SmallVector<Metadata *, 16> RetainValues; + // Declarations and definitions of the same type may be retained. Some + // clients RAUW these pairs, leaving duplicates in the retained types + // list. Use a set to remove the duplicates while we transform the + // TrackingVHs back into Values. + SmallPtrSet<Metadata *, 16> RetainSet; + for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++) + if (RetainSet.insert(AllRetainTypes[I]).second) + RetainValues.push_back(AllRetainTypes[I]); + + if (!RetainValues.empty()) CUNode->replaceRetainedTypes(MDTuple::get(VMContext, RetainValues)); - DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms); + DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms); + if (!AllSubprograms.empty()) CUNode->replaceSubprograms(SPs.get()); - for (auto *SP : SPs) { - if (MDTuple *Temp = SP->getVariables().get()) { - const auto &PV = PreservedVariables.lookup(SP); - SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end()); - DINodeArray AV = getOrCreateArray(Variables); - TempMDTuple(Temp)->replaceAllUsesWith(AV.get()); - } + + for (auto *SP : SPs) { + if (MDTuple *Temp = SP->getVariables().get()) { + const auto &PV = PreservedVariables.lookup(SP); + SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end()); + DINodeArray AV = getOrCreateArray(Variables); + TempMDTuple(Temp)->replaceAllUsesWith(AV.get()); } + } + if (!AllGVs.empty()) CUNode->replaceGlobalVariables(MDTuple::get(VMContext, AllGVs)); + if (!AllImportedModules.empty()) CUNode->replaceImportedEntities(MDTuple::get( VMContext, SmallVector<Metadata *, 16>(AllImportedModules.begin(), AllImportedModules.end()))); - } // Now that all temp nodes have been replaced or deleted, resolve remaining // cycles. @@ -585,7 +595,7 @@ DILocalVariable *DIBuilder::createLocalVariable( DIType *Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) { // FIXME: Why getNonCompileUnitScope()? // FIXME: Why is "!Context" okay here? - // FIXME: WHy doesn't this check for a subprogram or lexical block (AFAICT + // FIXME: Why doesn't this check for a subprogram or lexical block (AFAICT // the only valid scopes)? DIScope *Context = getNonCompileUnitScope(Scope); @@ -593,7 +603,7 @@ DILocalVariable *DIBuilder::createLocalVariable( VMContext, Tag, cast_or_null<DILocalScope>(Context), Name, File, LineNo, DITypeRef::get(Ty), ArgNo, Flags); if (AlwaysPreserve) { - // The optimizer may remove local variable. If there is an interest + // The optimizer may remove local variables. If there is an interest // to preserve variable info in such situation then stash it in a // named mdnode. DISubprogram *Fn = getDISubprogram(Scope); @@ -857,7 +867,7 @@ void DIBuilder::replaceArrays(DICompositeType *&T, DINodeArray Elements, if (!T->isResolved()) return; - // If "T" is resolved, it may be due to a self-reference cycle. Track the + // If T is resolved, it may be due to a self-reference cycle. Track the // arrays explicitly if they're unresolved, or else the cycles will be // orphaned. if (Elements) diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp index e3258895ea5e..b6a8bbcbe5fa 100644 --- a/lib/IR/Dominators.cpp +++ b/lib/IR/Dominators.cpp @@ -62,18 +62,14 @@ bool BasicBlockEdge::isSingleEdge() const { // //===----------------------------------------------------------------------===// -TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>); -TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>); - -#define LLVM_COMMA , -TEMPLATE_INSTANTIATION(void llvm::Calculate<Function LLVM_COMMA BasicBlock *>( - DominatorTreeBase<GraphTraits<BasicBlock *>::NodeType> &DT LLVM_COMMA - Function &F)); -TEMPLATE_INSTANTIATION( - void llvm::Calculate<Function LLVM_COMMA Inverse<BasicBlock *> >( - DominatorTreeBase<GraphTraits<Inverse<BasicBlock *> >::NodeType> &DT - LLVM_COMMA Function &F)); -#undef LLVM_COMMA +template class llvm::DomTreeNodeBase<BasicBlock>; +template class llvm::DominatorTreeBase<BasicBlock>; + +template void llvm::Calculate<Function, BasicBlock *>( + DominatorTreeBase<GraphTraits<BasicBlock *>::NodeType> &DT, Function &F); +template void llvm::Calculate<Function, Inverse<BasicBlock *>>( + DominatorTreeBase<GraphTraits<Inverse<BasicBlock *>>::NodeType> &DT, + Function &F); // dominates - Return true if Def dominates a use in User. This performs // the special checks necessary if Def and User are in the same basic block. diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 78d1adb5e700..f554d590284f 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -39,8 +39,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// // Value Class //===----------------------------------------------------------------------===// -const unsigned Value::NumUserOperandsBits; - static inline Type *checkType(Type *Ty) { assert(Ty && "Value defined with a null type: Error!"); return Ty; diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 3c61165768f8..2a0a4ff393ed 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -184,12 +184,12 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport { /// \brief Track unresolved string-based type references. SmallDenseMap<const MDString *, const MDNode *, 32> UnresolvedTypeRefs; - /// \brief Whether we've seen a call to @llvm.frameescape in this function + /// \brief Whether we've seen a call to @llvm.localescape in this function /// already. bool SawFrameEscape; - /// Stores the count of how many objects were passed to llvm.frameescape for a - /// given function and the largest index passed to llvm.framerecover. + /// Stores the count of how many objects were passed to llvm.localescape for a + /// given function and the largest index passed to llvm.localrecover. DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo; public: @@ -438,6 +438,9 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) { Assert(GVar && GVar->getValueType()->isArrayTy(), "Only global arrays can have appending linkage!", GVar); } + + if (GV.isDeclarationForLinker()) + Assert(!GV.hasComdat(), "Declaration may not be in a Comdat!", &GV); } void Verifier::visitGlobalVariable(const GlobalVariable &GV) { @@ -1270,7 +1273,8 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx, I->getKindAsEnum() == Attribute::Cold || I->getKindAsEnum() == Attribute::OptimizeNone || I->getKindAsEnum() == Attribute::JumpTable || - I->getKindAsEnum() == Attribute::Convergent) { + I->getKindAsEnum() == Attribute::Convergent || + I->getKindAsEnum() == Attribute::ArgMemOnly) { if (!isFunction) { CheckFailed("Attribute '" + I->getAsString() + "' only applies to functions!", V); @@ -1528,8 +1532,9 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) { const Instruction &CI = *CS.getInstruction(); - Assert(!CS.doesNotAccessMemory() && !CS.onlyReadsMemory(), - "gc.statepoint must read and write memory to preserve " + Assert(!CS.doesNotAccessMemory() && !CS.onlyReadsMemory() && + !CS.onlyAccessesArgMemory(), + "gc.statepoint must read and write all memory to preserve " "reordering restrictions required by safepoint semantics", &CI); @@ -1666,8 +1671,8 @@ void Verifier::verifyFrameRecoverIndices() { unsigned EscapedObjectCount = Counts.second.first; unsigned MaxRecoveredIndex = Counts.second.second; Assert(MaxRecoveredIndex <= EscapedObjectCount, - "all indices passed to llvm.framerecover must be less than the " - "number of arguments passed ot llvm.frameescape in the parent " + "all indices passed to llvm.localrecover must be less than the " + "number of arguments passed ot llvm.localescape in the parent " "function", F); } @@ -2535,10 +2540,6 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { Assert(isa<PointerType>(TargetTy), "GEP base pointer is not a vector or a vector of pointers", &GEP); Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP); - Assert(GEP.getPointerOperandType()->isVectorTy() == - GEP.getType()->isVectorTy(), - "Vector GEP must return a vector value", &GEP); - SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end()); Type *ElTy = GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs); @@ -2548,17 +2549,20 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { GEP.getResultElementType() == ElTy, "GEP is not of right type for indices!", &GEP, ElTy); - if (GEP.getPointerOperandType()->isVectorTy()) { + if (GEP.getType()->isVectorTy()) { // Additional checks for vector GEPs. - unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements(); - Assert(GepWidth == GEP.getType()->getVectorNumElements(), - "Vector GEP result width doesn't match operand's", &GEP); + unsigned GEPWidth = GEP.getType()->getVectorNumElements(); + if (GEP.getPointerOperandType()->isVectorTy()) + Assert(GEPWidth == GEP.getPointerOperandType()->getVectorNumElements(), + "Vector GEP result width doesn't match operand's", &GEP); for (unsigned i = 0, e = Idxs.size(); i != e; ++i) { Type *IndexTy = Idxs[i]->getType(); - Assert(IndexTy->isVectorTy(), "Vector GEP must have vector indices!", - &GEP); - unsigned IndexWidth = IndexTy->getVectorNumElements(); - Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP); + if (IndexTy->isVectorTy()) { + unsigned IndexWidth = IndexTy->getVectorNumElements(); + Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP); + } + Assert(IndexTy->getScalarType()->isIntegerTy(), + "All GEP indices should be of integer type"); } } visitInstruction(GEP); @@ -3276,32 +3280,32 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { "llvm.invariant.end parameter #2 must be a constant integer", CS); break; - case Intrinsic::frameescape: { + case Intrinsic::localescape: { BasicBlock *BB = CS.getParent(); Assert(BB == &BB->getParent()->front(), - "llvm.frameescape used outside of entry block", CS); + "llvm.localescape used outside of entry block", CS); Assert(!SawFrameEscape, - "multiple calls to llvm.frameescape in one function", CS); + "multiple calls to llvm.localescape in one function", CS); for (Value *Arg : CS.args()) { if (isa<ConstantPointerNull>(Arg)) continue; // Null values are allowed as placeholders. auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts()); Assert(AI && AI->isStaticAlloca(), - "llvm.frameescape only accepts static allocas", CS); + "llvm.localescape only accepts static allocas", CS); } FrameEscapeInfo[BB->getParent()].first = CS.getNumArgOperands(); SawFrameEscape = true; break; } - case Intrinsic::framerecover: { + case Intrinsic::localrecover: { Value *FnArg = CS.getArgOperand(0)->stripPointerCasts(); Function *Fn = dyn_cast<Function>(FnArg); Assert(Fn && !Fn->isDeclaration(), - "llvm.framerecover first " + "llvm.localrecover first " "argument must be function defined in this module", CS); auto *IdxArg = dyn_cast<ConstantInt>(CS.getArgOperand(2)); - Assert(IdxArg, "idx argument of llvm.framerecover must be a constant int", + Assert(IdxArg, "idx argument of llvm.localrecover must be a constant int", CS); auto &Entry = FrameEscapeInfo[Fn]; Entry.second = unsigned( diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 6131c3180249..53ed4175f8e3 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -473,6 +473,9 @@ void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def, if (def->hasComdat()) attr |= LTO_SYMBOL_COMDAT; + if (isa<GlobalAlias>(def)) + attr |= LTO_SYMBOL_ALIAS; + auto Iter = _defines.insert(Name).first; // fill information structure diff --git a/lib/LibDriver/LibDriver.cpp b/lib/LibDriver/LibDriver.cpp index cb3278c716e6..b33a22ff0cf8 100644 --- a/lib/LibDriver/LibDriver.cpp +++ b/lib/LibDriver/LibDriver.cpp @@ -56,17 +56,13 @@ public: } -static std::string getOutputPath(llvm::opt::InputArgList *Args) { +static std::string getOutputPath(llvm::opt::InputArgList *Args, + const llvm::NewArchiveIterator &FirstMember) { if (auto *Arg = Args->getLastArg(OPT_out)) return Arg->getValue(); - for (auto *Arg : Args->filtered(OPT_INPUT)) { - if (!StringRef(Arg->getValue()).endswith_lower(".obj")) - continue; - SmallString<128> Val = StringRef(Arg->getValue()); - llvm::sys::path::replace_extension(Val, ".lib"); - return Val.str(); - } - llvm_unreachable("internal error"); + SmallString<128> Val = FirstMember.getNew(); + llvm::sys::path::replace_extension(Val, ".lib"); + return Val.str(); } static std::vector<StringRef> getSearchPaths(llvm::opt::InputArgList *Args, @@ -144,7 +140,10 @@ int llvm::libDriverMain(llvm::ArrayRef<const char*> ArgsArr) { } std::pair<StringRef, std::error_code> Result = - llvm::writeArchive(getOutputPath(&Args), Members, /*WriteSymtab=*/true); + llvm::writeArchive(getOutputPath(&Args, Members[0]), Members, + /*WriteSymtab=*/true, object::Archive::K_GNU, + /*Deterministic*/ true); + if (Result.second) { if (Result.first.empty()) Result.first = ArgsArr[0]; diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 13c5ca9561df..6554d6a9e60e 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -28,6 +28,7 @@ add_llvm_library(LLVMMC MCObjectStreamer.cpp MCObjectWriter.cpp MCRegisterInfo.cpp + MCSchedule.cpp MCSection.cpp MCSectionCOFF.cpp MCSectionELF.cpp diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 9a65a3158972..227c937e8d1b 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -503,7 +503,8 @@ void MCAsmStreamer::EndCOFFSymbolDef() { } void MCAsmStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { - OS << "\t.safeseh\t" << *Symbol; + OS << "\t.safeseh\t"; + Symbol->print(OS, MAI); EmitEOL(); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index da6516a4ac92..f53b589e1aea 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -925,7 +925,7 @@ void MCAssembler::Finish() { Fixups = FragWithFixups->getFixups(); Contents = FragWithFixups->getContents(); } else - llvm_unreachable("Unknow fragment with fixups!"); + llvm_unreachable("Unknown fragment with fixups!"); for (const MCFixup &Fixup : Fixups) { uint64_t FixedValue; bool IsPCRel; diff --git a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp index 68948d36d65c..5fc2ca44f5d4 100644 --- a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp +++ b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp @@ -16,6 +16,10 @@ using namespace llvm; +namespace llvm { +class Triple; +} + // This function tries to add a symbolic operand in place of the immediate // Value in the MCInst. The immediate Value has had any PC adjustment made by // the caller. If the instruction is a branch instruction then IsBranch is true, @@ -184,7 +188,7 @@ void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, } namespace llvm { -MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo, +MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo, MCContext *Ctx, std::unique_ptr<MCRelocationInfo> &&RelInfo) { diff --git a/lib/MC/MCInstrDesc.cpp b/lib/MC/MCInstrDesc.cpp index decc2d84b252..5be2fa1b30b6 100644 --- a/lib/MC/MCInstrDesc.cpp +++ b/lib/MC/MCInstrDesc.cpp @@ -19,7 +19,7 @@ using namespace llvm; -bool MCInstrDesc::getDeprecatedInfo(MCInst &MI, MCSubtargetInfo &STI, +bool MCInstrDesc::getDeprecatedInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) const { if (ComplexDeprecationInfo) return ComplexDeprecationInfo(MI, STI, Info); diff --git a/lib/MC/MCSchedule.cpp b/lib/MC/MCSchedule.cpp new file mode 100644 index 000000000000..f3919427bf05 --- /dev/null +++ b/lib/MC/MCSchedule.cpp @@ -0,0 +1,34 @@ +//===- MCSchedule.cpp - Scheduling ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the default scheduling model. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSchedule.h" +#include <type_traits> + +using namespace llvm; + +static_assert(std::is_pod<MCSchedModel>::value, + "We shouldn't have a static constructor here"); +const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth, + DefaultMicroOpBufferSize, + DefaultLoopMicroOpBufferSize, + DefaultLoadLatency, + DefaultHighLatency, + DefaultMispredictPenalty, + false, + true, + 0, + nullptr, + nullptr, + 0, + 0, + nullptr}; diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index ece775c4f08f..9210cf544b16 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -17,42 +17,34 @@ using namespace llvm; -/// InitMCProcessorInfo - Set or change the CPU (optionally supplemented -/// with feature string). Recompute feature bits and scheduling model. -void -MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) { +static FeatureBitset getFeatures(StringRef CPU, StringRef FS, + ArrayRef<SubtargetFeatureKV> ProcDesc, + ArrayRef<SubtargetFeatureKV> ProcFeatures) { SubtargetFeatures Features(FS); - FeatureBits = Features.getFeatureBits(CPU, ProcDesc, ProcFeatures); - InitCPUSchedModel(CPU); + return Features.getFeatureBits(CPU, ProcDesc, ProcFeatures); } -void -MCSubtargetInfo::InitCPUSchedModel(StringRef CPU) { +void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) { + FeatureBits = getFeatures(CPU, FS, ProcDesc, ProcFeatures); if (!CPU.empty()) - CPUSchedModel = getSchedModelForCPU(CPU); + CPUSchedModel = &getSchedModelForCPU(CPU); else - CPUSchedModel = MCSchedModel::GetDefaultSchedModel(); + CPUSchedModel = &MCSchedModel::GetDefaultSchedModel(); } -void MCSubtargetInfo::InitMCSubtargetInfo( +void MCSubtargetInfo::setDefaultFeatures(StringRef CPU) { + FeatureBits = getFeatures(CPU, "", ProcDesc, ProcFeatures); +} + +MCSubtargetInfo::MCSubtargetInfo( const Triple &TT, StringRef C, StringRef FS, ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD, const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, - const InstrStage *IS, const unsigned *OC, const unsigned *FP) { - TargetTriple = TT; - CPU = C; - ProcFeatures = PF; - ProcDesc = PD; - ProcSchedModels = ProcSched; - WriteProcResTable = WPR; - WriteLatencyTable = WL; - ReadAdvanceTable = RA; - - Stages = IS; - OperandCycles = OC; - ForwardingPaths = FP; - + const InstrStage *IS, const unsigned *OC, const unsigned *FP) + : TargetTriple(TT), CPU(C), ProcFeatures(PF), ProcDesc(PD), + ProcSchedModels(ProcSched), WriteProcResTable(WPR), WriteLatencyTable(WL), + ReadAdvanceTable(RA), Stages(IS), OperandCycles(OC), ForwardingPaths(FP) { InitMCProcessorInfo(CPU, FS); } @@ -82,8 +74,7 @@ FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) { return FeatureBits; } -MCSchedModel -MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { +const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { assert(ProcSchedModels && "Processor machine model not available!"); unsigned NumProcs = ProcDesc.size(); @@ -116,6 +107,6 @@ MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const { /// Initialize an InstrItineraryData instance. void MCSubtargetInfo::initInstrItins(InstrItineraryData &InstrItins) const { - InstrItins = - InstrItineraryData(CPUSchedModel, Stages, OperandCycles, ForwardingPaths); + InstrItins = InstrItineraryData(getSchedModel(), Stages, OperandCycles, + ForwardingPaths); } diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index affc57471fdb..125380a9d140 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -19,9 +19,6 @@ using namespace llvm; // Sentinel value for the absolute pseudo section. MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast<MCSection *>(1); -const unsigned MCSymbol::NumCommonAlignmentBits; -const unsigned MCSymbol::NumFlagsBits; - void *MCSymbol::operator new(size_t s, const StringMapEntry<bool> *Name, MCContext &Ctx) { // We may need more space for a Name to account for alignment. So allocate diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index 54ed954a90d9..d4821196a6cf 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" using namespace llvm; using namespace object; @@ -115,6 +116,23 @@ uint64_t Archive::Child::getRawSize() const { return getHeader()->getSize(); } +ErrorOr<StringRef> Archive::Child::getBuffer() const { + if (!Parent->IsThin) + return StringRef(Data.data() + StartOfFile, getSize()); + ErrorOr<StringRef> Name = getName(); + if (std::error_code EC = Name.getError()) + return EC; + SmallString<128> FullName = + Parent->getMemoryBufferRef().getBufferIdentifier(); + sys::path::remove_filename(FullName); + sys::path::append(FullName, *Name); + ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName); + if (std::error_code EC = Buf.getError()) + return EC; + Parent->ThinBuffers.push_back(std::move(*Buf)); + return Parent->ThinBuffers.back()->getBuffer(); +} + Archive::Child Archive::Child::getNext() const { size_t SpaceToSkip = Data.size(); // If it's odd, add 1 to make it even. @@ -162,10 +180,10 @@ ErrorOr<StringRef> Archive::Child::getName() const { + Parent->StringTable->getSize())) return object_error::parse_failed; - // GNU long file names end with a /. + // GNU long file names end with a "/\n". if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) { - StringRef::size_type End = StringRef(addr).find('/'); - return StringRef(addr, End); + StringRef::size_type End = StringRef(addr).find('\n'); + return StringRef(addr, End - 1); } return StringRef(addr); } else if (name.startswith("#1/")) { @@ -186,7 +204,10 @@ ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { if (std::error_code EC = NameOrErr.getError()) return EC; StringRef Name = NameOrErr.get(); - return MemoryBufferRef(getBuffer(), Name); + ErrorOr<StringRef> Buf = getBuffer(); + if (std::error_code EC = Buf.getError()) + return EC; + return MemoryBufferRef(*Buf, Name); } ErrorOr<std::unique_ptr<Binary>> @@ -207,7 +228,8 @@ ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { } Archive::Archive(MemoryBufferRef Source, std::error_code &ec) - : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()) { + : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()), + StringTable(child_end()), FirstRegular(child_end()) { StringRef Buffer = Data.getBuffer(); // Check for sufficient magic. if (Buffer.startswith(ThinMagic)) { @@ -287,7 +309,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec) ++i; if (i == e) { - ec = object_error::parse_failed; + ec = std::error_code(); return; } Name = i->getRawName(); @@ -352,11 +374,11 @@ Archive::child_iterator Archive::child_end() const { } StringRef Archive::Symbol::getName() const { - return Parent->SymbolTable->getBuffer().begin() + StringIndex; + return Parent->getSymbolTable().begin() + StringIndex; } ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const { - const char *Buf = Parent->SymbolTable->getBuffer().begin(); + const char *Buf = Parent->getSymbolTable().begin(); const char *Offsets = Buf; if (Parent->kind() == K_MIPS64) Offsets += sizeof(uint64_t); @@ -420,7 +442,7 @@ Archive::Symbol Archive::Symbol::getNext() const { // and the second being the offset into the archive of the member that // define the symbol. After that the next uint32_t is the byte count of // the string table followed by the string table. - const char *Buf = Parent->SymbolTable->getBuffer().begin(); + const char *Buf = Parent->getSymbolTable().begin(); uint32_t RanlibCount = 0; RanlibCount = read32le(Buf) / 8; // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) @@ -437,8 +459,7 @@ Archive::Symbol Archive::Symbol::getNext() const { } } else { // Go to one past next null. - t.StringIndex = - Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1; + t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1; } ++t.SymbolIndex; return t; @@ -448,7 +469,7 @@ Archive::symbol_iterator Archive::symbol_begin() const { if (!hasSymbolTable()) return symbol_iterator(Symbol(this, 0, 0)); - const char *buf = SymbolTable->getBuffer().begin(); + const char *buf = getSymbolTable().begin(); if (kind() == K_GNU) { uint32_t symbol_count = 0; symbol_count = read32be(buf); @@ -480,7 +501,7 @@ Archive::symbol_iterator Archive::symbol_begin() const { symbol_count = read32le(buf); buf += 4 + (symbol_count * 2); // Skip indices. } - uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin(); + uint32_t string_start_offset = buf - getSymbolTable().begin(); return symbol_iterator(Symbol(this, 0, string_start_offset)); } @@ -491,7 +512,7 @@ Archive::symbol_iterator Archive::symbol_end() const { } uint32_t Archive::getNumberOfSymbols() const { - const char *buf = SymbolTable->getBuffer().begin(); + const char *buf = getSymbolTable().begin(); if (kind() == K_GNU) return read32be(buf); if (kind() == K_MIPS64) diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index 00a56d13bfed..a40901c924ea 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -34,8 +34,6 @@ using namespace llvm; -NewArchiveIterator::NewArchiveIterator() {} - NewArchiveIterator::NewArchiveIterator(object::Archive::child_iterator I, StringRef Name) : IsNewMember(false), Name(Name), OldI(I) {} @@ -93,8 +91,12 @@ static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size, } } -static void print32BE(raw_ostream &Out, uint32_t Val) { - support::endian::Writer<support::big>(Out).write(Val); +static void print32(raw_ostream &Out, object::Archive::Kind Kind, + uint32_t Val) { + if (Kind == object::Archive::K_GNU) + support::endian::Writer<support::big>(Out).write(Val); + else + support::endian::Writer<support::little>(Out).write(Val); } static void printRestOfMemberHeader(raw_fd_ostream &Out, @@ -109,18 +111,42 @@ static void printRestOfMemberHeader(raw_fd_ostream &Out, Out << "`\n"; } -static void printMemberHeader(raw_fd_ostream &Out, StringRef Name, - const sys::TimeValue &ModTime, unsigned UID, - unsigned GID, unsigned Perms, unsigned Size) { +static void printGNUSmallMemberHeader(raw_fd_ostream &Out, StringRef Name, + const sys::TimeValue &ModTime, + unsigned UID, unsigned GID, + unsigned Perms, unsigned Size) { printWithSpacePadding(Out, Twine(Name) + "/", 16); printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); } -static void printMemberHeader(raw_fd_ostream &Out, unsigned NameOffset, - const sys::TimeValue &ModTime, unsigned UID, - unsigned GID, unsigned Perms, unsigned Size) { +static void printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name, + const sys::TimeValue &ModTime, unsigned UID, + unsigned GID, unsigned Perms, unsigned Size) { + uint64_t PosAfterHeader = Out.tell() + 60 + Name.size(); + // Pad so that even 64 bit object files are aligned. + unsigned Pad = OffsetToAlignment(PosAfterHeader, 8); + unsigned NameWithPadding = Name.size() + Pad; + printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16); + printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, + NameWithPadding + Size); + Out << Name; + assert(PosAfterHeader == Out.tell()); + while (Pad--) + Out.write(uint8_t(0)); +} + +static void +printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, + StringRef Name, + std::vector<unsigned>::iterator &StringMapIndexIter, + const sys::TimeValue &ModTime, unsigned UID, unsigned GID, + unsigned Perms, unsigned Size) { + if (Kind == object::Archive::K_BSD) + return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); + if (Name.size() < 16) + return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); Out << '/'; - printWithSpacePadding(Out, NameOffset, 15); + printWithSpacePadding(Out, *StringMapIndexIter++, 15); printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); } @@ -152,20 +178,26 @@ static void writeStringTable(raw_fd_ostream &Out, Out.seek(Pos); } +static sys::TimeValue now(bool Deterministic) { + if (!Deterministic) + return sys::TimeValue::now(); + sys::TimeValue TV; + TV.fromEpochTime(0); + return TV; +} + // Returns the offset of the first reference to a member offset. static ErrorOr<unsigned> -writeSymbolTable(raw_fd_ostream &Out, ArrayRef<NewArchiveIterator> Members, +writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, + ArrayRef<NewArchiveIterator> Members, ArrayRef<MemoryBufferRef> Buffers, - std::vector<unsigned> &MemberOffsetRefs) { - unsigned StartOffset = 0; - unsigned MemberNum = 0; - std::string NameBuf; - raw_string_ostream NameOS(NameBuf); - unsigned NumSyms = 0; + std::vector<unsigned> &MemberOffsetRefs, bool Deterministic) { + unsigned HeaderStartOffset = 0; + unsigned BodyStartOffset = 0; + SmallString<128> NameBuf; + raw_svector_ostream NameOS(NameBuf); LLVMContext Context; - for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(), - E = Members.end(); - I != E; ++I, ++MemberNum) { + for (unsigned MemberNum = 0, N = Members.size(); MemberNum < N; ++MemberNum) { MemoryBufferRef MemberBuffer = Buffers[MemberNum]; ErrorOr<std::unique_ptr<object::SymbolicFile>> ObjOrErr = object::SymbolicFile::createSymbolicFile( @@ -174,10 +206,14 @@ writeSymbolTable(raw_fd_ostream &Out, ArrayRef<NewArchiveIterator> Members, continue; // FIXME: check only for "not an object file" errors. object::SymbolicFile &Obj = *ObjOrErr.get(); - if (!StartOffset) { - printMemberHeader(Out, "", sys::TimeValue::now(), 0, 0, 0, 0); - StartOffset = Out.tell(); - print32BE(Out, 0); + if (!HeaderStartOffset) { + HeaderStartOffset = Out.tell(); + if (Kind == object::Archive::K_GNU) + printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0); + else + printBSDMemberHeader(Out, "__.SYMDEF", now(Deterministic), 0, 0, 0, 0); + BodyStartOffset = Out.tell(); + print32(Out, Kind, 0); // number of entries or bytes } for (const object::BasicSymbolRef &S : Obj.symbols()) { @@ -188,35 +224,53 @@ writeSymbolTable(raw_fd_ostream &Out, ArrayRef<NewArchiveIterator> Members, continue; if (Symflags & object::SymbolRef::SF_Undefined) continue; + + unsigned NameOffset = NameOS.tell(); if (auto EC = S.printName(NameOS)) return EC; NameOS << '\0'; - ++NumSyms; MemberOffsetRefs.push_back(MemberNum); - print32BE(Out, 0); + if (Kind == object::Archive::K_BSD) + print32(Out, Kind, NameOffset); + print32(Out, Kind, 0); // member offset } } - Out << NameOS.str(); - if (StartOffset == 0) + if (HeaderStartOffset == 0) return 0; - if (Out.tell() % 2) - Out << '\0'; + StringRef StringTable = NameOS.str(); + if (Kind == object::Archive::K_BSD) + print32(Out, Kind, StringTable.size()); // byte count of the string table + Out << StringTable; + + // ld64 requires the next member header to start at an offset that is + // 4 bytes aligned. + unsigned Pad = OffsetToAlignment(Out.tell(), 4); + while (Pad--) + Out.write(uint8_t(0)); + // Patch up the size of the symbol table now that we know how big it is. unsigned Pos = Out.tell(); - Out.seek(StartOffset - 12); - printWithSpacePadding(Out, Pos - StartOffset, 10); - Out.seek(StartOffset); - print32BE(Out, NumSyms); + const unsigned MemberHeaderSize = 60; + Out.seek(HeaderStartOffset + 48); // offset of the size field. + printWithSpacePadding(Out, Pos - MemberHeaderSize - HeaderStartOffset, 10); + + // Patch up the number of symbols. + Out.seek(BodyStartOffset); + unsigned NumSyms = MemberOffsetRefs.size(); + if (Kind == object::Archive::K_GNU) + print32(Out, Kind, NumSyms); + else + print32(Out, Kind, NumSyms * 8); + Out.seek(Pos); - return StartOffset + 4; + return BodyStartOffset + 4; } -std::pair<StringRef, std::error_code> -llvm::writeArchive(StringRef ArcName, - std::vector<NewArchiveIterator> &NewMembers, - bool WriteSymtab) { +std::pair<StringRef, std::error_code> llvm::writeArchive( + StringRef ArcName, std::vector<NewArchiveIterator> &NewMembers, + bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic) { SmallString<128> TmpArchive; int TmpArchiveFD; if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a", @@ -267,56 +321,60 @@ llvm::writeArchive(StringRef ArcName, unsigned MemberReferenceOffset = 0; if (WriteSymtab) { - ErrorOr<unsigned> MemberReferenceOffsetOrErr = - writeSymbolTable(Out, NewMembers, Members, MemberOffsetRefs); + ErrorOr<unsigned> MemberReferenceOffsetOrErr = writeSymbolTable( + Out, Kind, NewMembers, Members, MemberOffsetRefs, Deterministic); if (auto EC = MemberReferenceOffsetOrErr.getError()) return std::make_pair(ArcName, EC); MemberReferenceOffset = MemberReferenceOffsetOrErr.get(); } std::vector<unsigned> StringMapIndexes; - writeStringTable(Out, NewMembers, StringMapIndexes); + if (Kind != object::Archive::K_BSD) + writeStringTable(Out, NewMembers, StringMapIndexes); unsigned MemberNum = 0; - unsigned LongNameMemberNum = 0; unsigned NewMemberNum = 0; + std::vector<unsigned>::iterator StringMapIndexIter = StringMapIndexes.begin(); std::vector<unsigned> MemberOffset; - for (std::vector<NewArchiveIterator>::iterator I = NewMembers.begin(), - E = NewMembers.end(); - I != E; ++I, ++MemberNum) { + for (const NewArchiveIterator &I : NewMembers) { + MemoryBufferRef File = Members[MemberNum++]; unsigned Pos = Out.tell(); MemberOffset.push_back(Pos); - MemoryBufferRef File = Members[MemberNum]; - if (I->isNewMember()) { - StringRef FileName = I->getNew(); + sys::TimeValue ModTime; + unsigned UID; + unsigned GID; + unsigned Perms; + if (Deterministic) { + ModTime.fromEpochTime(0); + UID = 0; + GID = 0; + Perms = 0644; + } else if (I.isNewMember()) { const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum]; - NewMemberNum++; - - StringRef Name = sys::path::filename(FileName); - if (Name.size() < 16) - printMemberHeader(Out, Name, Status.getLastModificationTime(), - Status.getUser(), Status.getGroup(), - Status.permissions(), Status.getSize()); - else - printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++], - Status.getLastModificationTime(), Status.getUser(), - Status.getGroup(), Status.permissions(), - Status.getSize()); + ModTime = Status.getLastModificationTime(); + UID = Status.getUser(); + GID = Status.getGroup(); + Perms = Status.permissions(); } else { - object::Archive::child_iterator OldMember = I->getOld(); - StringRef Name = I->getName(); + object::Archive::child_iterator OldMember = I.getOld(); + ModTime = OldMember->getLastModified(); + UID = OldMember->getUID(); + GID = OldMember->getGID(); + Perms = OldMember->getAccessMode(); + } - if (Name.size() < 16) - printMemberHeader(Out, Name, OldMember->getLastModified(), - OldMember->getUID(), OldMember->getGID(), - OldMember->getAccessMode(), OldMember->getSize()); - else - printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++], - OldMember->getLastModified(), OldMember->getUID(), - OldMember->getGID(), OldMember->getAccessMode(), - OldMember->getSize()); + if (I.isNewMember()) { + StringRef FileName = I.getNew(); + const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum++]; + printMemberHeader(Out, Kind, sys::path::filename(FileName), + StringMapIndexIter, ModTime, UID, GID, Perms, + Status.getSize()); + } else { + object::Archive::child_iterator OldMember = I.getOld(); + printMemberHeader(Out, Kind, I.getName(), StringMapIndexIter, ModTime, + UID, GID, Perms, OldMember->getSize()); } Out << File.getBuffer(); @@ -327,8 +385,11 @@ llvm::writeArchive(StringRef ArcName, if (MemberReferenceOffset) { Out.seek(MemberReferenceOffset); - for (unsigned MemberNum : MemberOffsetRefs) - print32BE(Out, MemberOffset[MemberNum]); + for (unsigned MemberNum : MemberOffsetRefs) { + if (Kind == object::Archive::K_BSD) + Out.seek(Out.tell() + 4); // skip over the string offset + print32(Out, Kind, MemberOffset[MemberNum]); + } } Output.keep(); diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 64bb0d5c636d..bcca9839b475 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -154,30 +154,24 @@ ErrorOr<StringRef> COFFObjectFile::getSymbolName(DataRefImpl Ref) const { return Result; } -uint64_t COFFObjectFile::getSymbolValue(DataRefImpl Ref) const { - COFFSymbolRef Sym = getCOFFSymbol(Ref); - - if (Sym.isAnyUndefined() || Sym.isCommon()) - return UnknownAddress; - - return Sym.getValue(); +uint64_t COFFObjectFile::getSymbolValueImpl(DataRefImpl Ref) const { + return getCOFFSymbol(Ref).getValue(); } -std::error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref, - uint64_t &Result) const { - Result = getSymbolValue(Ref); +ErrorOr<uint64_t> COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const { + uint64_t Result = getSymbolValue(Ref); COFFSymbolRef Symb = getCOFFSymbol(Ref); int32_t SectionNumber = Symb.getSectionNumber(); if (Symb.isAnyUndefined() || Symb.isCommon() || COFF::isReservedSectionNumber(SectionNumber)) - return std::error_code(); + return Result; const coff_section *Section = nullptr; if (std::error_code EC = getSection(SectionNumber, Section)) return EC; Result += Section->VirtualAddress; - return std::error_code(); + return Result; } SymbolRef::Type COFFObjectFile::getSymbolType(DataRefImpl Ref) const { @@ -362,6 +356,8 @@ getFirstReloc(const coff_section *Sec, MemoryBufferRef M, const uint8_t *Base) { relocation_iterator COFFObjectFile::section_rel_begin(DataRefImpl Ref) const { const coff_section *Sec = toSec(Ref); const coff_relocation *begin = getFirstReloc(Sec, Data, base()); + if (begin && Sec->VirtualAddress != 0) + report_fatal_error("Sections with relocations should have an address of 0"); DataRefImpl Ret; Ret.p = reinterpret_cast<uintptr_t>(begin); return relocation_iterator(RelocationRef(Ret, this)); @@ -919,19 +915,15 @@ uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const { // whether or not we have an executable image. // // For object files, SizeOfRawData contains the size of section's data; - // VirtualSize is always zero. + // VirtualSize should be zero but isn't due to buggy COFF writers. // // For executables, SizeOfRawData *must* be a multiple of FileAlignment; the // actual section size is in VirtualSize. It is possible for VirtualSize to // be greater than SizeOfRawData; the contents past that point should be // considered to be zero. - uint32_t SectionSize; - if (Sec->VirtualSize) - SectionSize = std::min(Sec->VirtualSize, Sec->SizeOfRawData); - else - SectionSize = Sec->SizeOfRawData; - - return SectionSize; + if (getDOSHeader()) + return std::min(Sec->VirtualSize, Sec->SizeOfRawData); + return Sec->SizeOfRawData; } std::error_code @@ -961,10 +953,6 @@ void COFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const { reinterpret_cast<const coff_relocation*>(Rel.p) + 1); } -ErrorOr<uint64_t> COFFObjectFile::getRelocationAddress(DataRefImpl Rel) const { - report_fatal_error("getRelocationAddress not implemented in COFFObjectFile"); -} - uint64_t COFFObjectFile::getRelocationOffset(DataRefImpl Rel) const { const coff_relocation *R = toRel(Rel); return R->VirtualAddress; diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp index ecdd468305be..72c232c32870 100644 --- a/lib/Object/ELFYAML.cpp +++ b/lib/Object/ELFYAML.cpp @@ -627,6 +627,11 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) { IO.mapOptional("Size", Section.Size, Hex64(Section.Content.binary_size())); } +static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) { + commonSectionMapping(IO, Section); + IO.mapOptional("Size", Section.Size, Hex64(0)); +} + static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) { commonSectionMapping(IO, Section); IO.mapOptional("Relocations", Section.Relocations); @@ -682,6 +687,11 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping( Section.reset(new ELFYAML::Group()); groupSectionMapping(IO, *cast<ELFYAML::Group>(Section.get())); break; + case ELF::SHT_NOBITS: + if (!IO.outputting()) + Section.reset(new ELFYAML::NoBitsSection()); + sectionMapping(IO, *cast<ELFYAML::NoBitsSection>(Section.get())); + break; case ELF::SHT_MIPS_ABIFLAGS: if (!IO.outputting()) Section.reset(new ELFYAML::MipsABIFlags()); diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 4255ed717fb9..05900630c75c 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -368,18 +368,12 @@ std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb, return std::error_code(); } -uint64_t MachOObjectFile::getSymbolValue(DataRefImpl Sym) const { - uint64_t NValue = getNValue(Sym); - MachO::nlist_base Entry = getSymbolTableEntryBase(this, Sym); - if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) - return UnknownAddress; - return NValue; +uint64_t MachOObjectFile::getSymbolValueImpl(DataRefImpl Sym) const { + return getNValue(Sym); } -std::error_code MachOObjectFile::getSymbolAddress(DataRefImpl Sym, - uint64_t &Res) const { - Res = getSymbolValue(Sym); - return std::error_code(); +ErrorOr<uint64_t> MachOObjectFile::getSymbolAddress(DataRefImpl Sym) const { + return getSymbolValue(Sym); } uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const { @@ -392,9 +386,7 @@ uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const { } uint64_t MachOObjectFile::getCommonSymbolSizeImpl(DataRefImpl DRI) const { - uint64_t Value; - getSymbolAddress(DRI, Value); - return Value; + return getNValue(DRI); } SymbolRef::Type MachOObjectFile::getSymbolType(DataRefImpl Symb) const { @@ -422,9 +414,6 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { uint32_t Result = SymbolRef::SF_None; - if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) - Result |= SymbolRef::SF_Undefined; - if ((MachOType & MachO::N_TYPE) == MachO::N_INDR) Result |= SymbolRef::SF_Indirect; @@ -434,10 +423,10 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { if (MachOType & MachO::N_EXT) { Result |= SymbolRef::SF_Global; if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) { - uint64_t Value; - getSymbolAddress(DRI, Value); - if (Value && Value != UnknownAddress) + if (getNValue(DRI)) Result |= SymbolRef::SF_Common; + else + Result |= SymbolRef::SF_Undefined; } if (!(MachOType & MachO::N_PEXT)) @@ -593,15 +582,6 @@ void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const { ++Rel.d.b; } -ErrorOr<uint64_t> MachOObjectFile::getRelocationAddress(DataRefImpl Rel) const { - uint64_t Offset = getRelocationOffset(Rel); - - DataRefImpl Sec; - Sec.d.a = Rel.d.a; - uint64_t SecAddress = getSectionAddress(Sec); - return SecAddress + Offset; -} - uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const { assert(getHeader().filetype == MachO::MH_OBJECT && "Only implemented for MH_OBJECT"); @@ -932,6 +912,13 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, return std::error_code(); } +section_iterator +MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const { + DataRefImpl Sec; + Sec.d.a = Rel->getRawDataRefImpl().d.a; + return section_iterator(SectionRef(Sec, this)); +} + basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const { return getSymbolByIndex(0); } diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 945252b21046..5c4b7a67b2ad 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -180,10 +180,10 @@ const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) { } uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) { - uint64_t ret; - if (std::error_code ec = (*unwrap(SI))->getAddress(ret)) - report_fatal_error(ec.message()); - return ret; + ErrorOr<uint64_t> Ret = (*unwrap(SI))->getAddress(); + if (std::error_code EC = Ret.getError()) + report_fatal_error(EC.message()); + return *Ret; } uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) { @@ -191,13 +191,6 @@ uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) { } // RelocationRef accessors -uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) { - ErrorOr<uint64_t> Ret = (*unwrap(RI))->getAddress(); - if (std::error_code EC = Ret.getError()) - report_fatal_error(EC.message()); - return *Ret; -} - uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) { return (*unwrap(RI))->getOffset(); } diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index 04e4916f94ef..f82edae89bc6 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -35,6 +35,15 @@ bool SectionRef::containsSymbol(SymbolRef S) const { return *this == *SymSec; } +uint64_t ObjectFile::getSymbolValue(DataRefImpl Ref) const { + uint32_t Flags = getSymbolFlags(Ref); + if (Flags & SymbolRef::SF_Undefined) + return 0; + if (Flags & SymbolRef::SF_Common) + return getCommonSymbolSize(Ref); + return getSymbolValueImpl(Ref); +} + std::error_code ObjectFile::printSymbolName(raw_ostream &OS, DataRefImpl Symb) const { ErrorOr<StringRef> Name = getSymbolName(Symb); diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 4b0a0e5d4819..5d31225396d4 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -52,14 +52,17 @@ namespace llvm { /* Number of bits in the significand. This includes the integer bit. */ unsigned int precision; + + /* Number of bits actually used in the semantics. */ + unsigned int sizeInBits; }; - const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 }; - const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 }; - const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 }; - const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 }; - const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 }; - const fltSemantics APFloat::Bogus = { 0, 0, 0 }; + const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 }; + const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 }; + const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 }; + const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 }; + const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 }; + const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 }; /* The PowerPC format consists of two doubles. It does not map cleanly onto the usual format above. It is approximated using twice the @@ -72,7 +75,7 @@ namespace llvm { to represent all possible values held by a PPC double-double number, for example: (long double) 1.0 + (long double) 0x1p-106 Should this be replaced by a full emulation of PPC double-double? */ - const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 }; + const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 }; /* A tight upper bound on number of parts required to hold the value pow(5, power) is @@ -2416,7 +2419,7 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, roundingMode rounding_mode) { unsigned int parts, pow5PartCount; - fltSemantics calcSemantics = { 32767, -32767, 0 }; + fltSemantics calcSemantics = { 32767, -32767, 0, 0 }; integerPart pow5Parts[maxPowerOfFiveParts]; bool isNearest; @@ -3368,6 +3371,10 @@ APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) } } +unsigned APFloat::getSizeInBits(const fltSemantics &Sem) { + return Sem.sizeInBits; +} + /// Make this number the largest magnitude normal number in the given /// semantics. void APFloat::makeLargest(bool Negative) { diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index dcaacf6248d1..17fba95ebb2b 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -46,21 +46,21 @@ using namespace cl; // namespace llvm { namespace cl { -TEMPLATE_INSTANTIATION(class basic_parser<bool>); -TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>); -TEMPLATE_INSTANTIATION(class basic_parser<int>); -TEMPLATE_INSTANTIATION(class basic_parser<unsigned>); -TEMPLATE_INSTANTIATION(class basic_parser<unsigned long long>); -TEMPLATE_INSTANTIATION(class basic_parser<double>); -TEMPLATE_INSTANTIATION(class basic_parser<float>); -TEMPLATE_INSTANTIATION(class basic_parser<std::string>); -TEMPLATE_INSTANTIATION(class basic_parser<char>); - -TEMPLATE_INSTANTIATION(class opt<unsigned>); -TEMPLATE_INSTANTIATION(class opt<int>); -TEMPLATE_INSTANTIATION(class opt<std::string>); -TEMPLATE_INSTANTIATION(class opt<char>); -TEMPLATE_INSTANTIATION(class opt<bool>); +template class basic_parser<bool>; +template class basic_parser<boolOrDefault>; +template class basic_parser<int>; +template class basic_parser<unsigned>; +template class basic_parser<unsigned long long>; +template class basic_parser<double>; +template class basic_parser<float>; +template class basic_parser<std::string>; +template class basic_parser<char>; + +template class opt<unsigned>; +template class opt<int>; +template class opt<std::string>; +template class opt<char>; +template class opt<bool>; } } // end namespace llvm::cl diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 92be0e047f62..c6646fb101b7 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -1165,6 +1165,122 @@ Triple Triple::get64BitArchVariant() const { return T; } +Triple Triple::getBigEndianArchVariant() const { + Triple T(*this); + switch (getArch()) { + case Triple::UnknownArch: + case Triple::amdgcn: + case Triple::amdil64: + case Triple::amdil: + case Triple::hexagon: + case Triple::hsail64: + case Triple::hsail: + case Triple::kalimba: + case Triple::le32: + case Triple::le64: + case Triple::msp430: + case Triple::nvptx64: + case Triple::nvptx: + case Triple::r600: + case Triple::shave: + case Triple::spir64: + case Triple::spir: + case Triple::wasm32: + case Triple::wasm64: + case Triple::x86: + case Triple::x86_64: + case Triple::xcore: + + // ARM is intentionally unsupported here, changing the architecture would + // drop any arch suffixes. + case Triple::arm: + case Triple::thumb: + T.setArch(UnknownArch); + break; + + case Triple::aarch64_be: + case Triple::armeb: + case Triple::bpfeb: + case Triple::mips64: + case Triple::mips: + case Triple::ppc64: + case Triple::ppc: + case Triple::sparc: + case Triple::sparcv9: + case Triple::systemz: + case Triple::tce: + case Triple::thumbeb: + // Already big endian. + break; + + case Triple::aarch64: T.setArch(Triple::aarch64_be); break; + case Triple::bpfel: T.setArch(Triple::bpfeb); break; + case Triple::mips64el:T.setArch(Triple::mips64); break; + case Triple::mipsel: T.setArch(Triple::mips); break; + case Triple::ppc64le: T.setArch(Triple::ppc64); break; + case Triple::sparcel: T.setArch(Triple::sparc); break; + } + return T; +} + +Triple Triple::getLittleEndianArchVariant() const { + Triple T(*this); + switch (getArch()) { + case Triple::UnknownArch: + case Triple::ppc: + case Triple::sparcv9: + case Triple::systemz: + case Triple::tce: + + // ARM is intentionally unsupported here, changing the architecture would + // drop any arch suffixes. + case Triple::armeb: + case Triple::thumbeb: + T.setArch(UnknownArch); + break; + + case Triple::aarch64: + case Triple::amdgcn: + case Triple::amdil64: + case Triple::amdil: + case Triple::arm: + case Triple::bpfel: + case Triple::hexagon: + case Triple::hsail64: + case Triple::hsail: + case Triple::kalimba: + case Triple::le32: + case Triple::le64: + case Triple::mips64el: + case Triple::mipsel: + case Triple::msp430: + case Triple::nvptx64: + case Triple::nvptx: + case Triple::ppc64le: + case Triple::r600: + case Triple::shave: + case Triple::sparcel: + case Triple::spir64: + case Triple::spir: + case Triple::thumb: + case Triple::wasm32: + case Triple::wasm64: + case Triple::x86: + case Triple::x86_64: + case Triple::xcore: + // Already little endian. + break; + + case Triple::aarch64_be: T.setArch(Triple::aarch64); break; + case Triple::bpfeb: T.setArch(Triple::bpfel); break; + case Triple::mips64: T.setArch(Triple::mips64el); break; + case Triple::mips: T.setArch(Triple::mipsel); break; + case Triple::ppc64: T.setArch(Triple::ppc64le); break; + case Triple::sparc: T.setArch(Triple::sparcel); break; + } + return T; +} + const char *Triple::getARMCPUForArch(StringRef MArch) const { if (MArch.empty()) MArch = getArchName(); diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 6e982bf1da19..c9a31b64cfd3 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -1648,7 +1648,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) { } OS << " {"; - const std::vector<Record*> &SC = R.getSuperClasses(); + ArrayRef<Record *> SC = R.getSuperClasses(); if (!SC.empty()) { OS << "\t//"; for (const Record *Super : SC) diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp index 92f5b2dd7172..07c538159dcb 100644 --- a/lib/TableGen/SetTheory.cpp +++ b/lib/TableGen/SetTheory.cpp @@ -302,7 +302,7 @@ const RecVec *SetTheory::expand(Record *Set) { return &I->second; // This is the first time we see Set. Find a suitable expander. - const std::vector<Record*> &SC = Set->getSuperClasses(); + ArrayRef<Record *> SC = Set->getSuperClasses(); for (unsigned i = 0, e = SC.size(); i != e; ++i) { // Skip unnamed superclasses. if (!dyn_cast<StringInit>(SC[i]->getNameInit())) diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 15df25aea50e..5c36fda2e1ca 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -184,7 +184,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) { // Since everything went well, we can now set the "superclass" list for the // current record. - const std::vector<Record*> &SCs = SC->getSuperClasses(); + ArrayRef<Record *> SCs = SC->getSuperClasses(); ArrayRef<SMRange> SCRanges = SC->getSuperClassRanges(); for (unsigned i = 0, e = SCs.size(); i != e; ++i) { if (CurRec->isSubClassOf(SCs[i])) diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index bffd9e6e8c76..79a84ad8c6c5 100644 --- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -510,9 +510,17 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C, if (J.isRegMask()) AvailableRegs.clearBitsNotInMask(J.getRegMask()); - if (J.isReg() && J.isDef() && AvailableRegs[J.getReg()]) { - assert(J.isDead() && "Non-dead def should have been removed by now!"); - AvailableRegs.reset(J.getReg()); + if (J.isReg() && J.isDef()) { + MCRegAliasIterator AI(J.getReg(), TRI, /*IncludeSelf=*/true); + if (J.isDead()) + for (; AI.isValid(); ++AI) + AvailableRegs.reset(*AI); +#ifndef NDEBUG + else + for (; AI.isValid(); ++AI) + assert(!AvailableRegs[*AI] && + "Non-dead def should have been removed by now!"); +#endif } } } @@ -585,7 +593,6 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C, if (Change) { Substs[MO.getReg()] = Reg; MO.setReg(Reg); - MRI->setPhysRegUsed(Reg); Changed = true; } diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index 4691e949838d..815ebef177d8 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -40,6 +40,11 @@ def CC_AArch64_AAPCS : CallingConv<[ // slot is 64-bit. CCIfByVal<CCPassByVal<8, 8>>, + // The 'nest' parameter, if any, is passed in X18. + // Darwin uses X18 as the platform register and hence 'nest' isn't currently + // supported there. + CCIfNest<CCAssignToReg<[X18]>>, + CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index c19fcdc4bb18..072819836bb3 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -310,7 +310,7 @@ CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { } unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { - assert(TLI.getValueType(AI->getType(), true) == MVT::i64 && + assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && "Alloca should always return a pointer."); // Don't handle dynamic allocas. @@ -420,7 +420,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); - EVT DestEVT = TLI.getValueType(GV->getType(), true); + EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); if (!DestEVT.isSimple()) return 0; @@ -459,7 +459,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { } unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) @@ -538,13 +538,14 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) } case Instruction::IntToPtr: { // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return computeAddress(U->getOperand(0), Addr, Ty); break; } case Instruction::PtrToInt: { // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return computeAddress(U->getOperand(0), Addr, Ty); break; } @@ -879,13 +880,13 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { case Instruction::IntToPtr: // Look past no-op inttoptrs if its operand is in the same BB. if (InMBB && - TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints if its operand is in the same BB. - if (InMBB && - TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; } @@ -906,7 +907,7 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT evt = TLI.getValueType(Ty, true); + EVT evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) @@ -1390,7 +1391,7 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { Type *Ty = LHS->getType(); - EVT EVT = TLI.getValueType(Ty, true); + EVT EVT = TLI.getValueType(DL, Ty, true); if (!EVT.isSimple()) return false; MVT VT = EVT.getSimpleVT(); @@ -2761,7 +2762,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { if (SrcReg == 0) return false; - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); if (SrcVT == MVT::f128) return false; @@ -2797,7 +2798,7 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { return false; bool SrcIsKill = hasTrivialKill(I->getOperand(0)); - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); // Handle sign-extension. if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { @@ -2856,7 +2857,7 @@ bool AArch64FastISel::fastLowerArguments() { if (ArgTy->isStructTy() || ArgTy->isArrayTy()) return false; - EVT ArgVT = TLI.getValueType(ArgTy); + EVT ArgVT = TLI.getValueType(DL, ArgTy); if (!ArgVT.isSimple()) return false; @@ -2898,7 +2899,7 @@ bool AArch64FastISel::fastLowerArguments() { unsigned GPRIdx = 0; unsigned FPRIdx = 0; for (auto const &Arg : F->args()) { - MVT VT = TLI.getSimpleValueType(Arg.getType()); + MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); unsigned SrcReg; const TargetRegisterClass *RC; if (VT >= MVT::i1 && VT <= MVT::i32) { @@ -3689,7 +3690,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { CallingConv::ID CC = F.getCallingConv(); SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; @@ -3724,7 +3725,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (!MRI.getRegClass(SrcReg)->contains(DestReg)) return false; - EVT RVEVT = TLI.getValueType(RV->getType()); + EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; @@ -3772,8 +3773,8 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) { Value *Op = I->getOperand(0); Type *SrcTy = Op->getType(); - EVT SrcEVT = TLI.getValueType(SrcTy, true); - EVT DestEVT = TLI.getValueType(DestTy, true); + EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); + EVT DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) @@ -4459,7 +4460,7 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) { } bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { - EVT DestEVT = TLI.getValueType(I->getType(), true); + EVT DestEVT = TLI.getValueType(DL, I->getType(), true); if (!DestEVT.isSimple()) return false; @@ -4825,7 +4826,7 @@ std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { bool IdxNIsKill = hasTrivialKill(Idx); // If the index is smaller or larger than intptr_t, truncate or extend it. - MVT PtrVT = TLI.getPointerTy(); + MVT PtrVT = TLI.getPointerTy(DL); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); if (IdxVT.bitsLT(PtrVT)) { IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false); @@ -4849,7 +4850,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { // into a single N = N + TotalOffset. uint64_t TotalOffs = 0; Type *Ty = I->getOperand(0)->getType(); - MVT VT = TLI.getPointerTy(); + MVT VT = TLI.getPointerTy(DL); for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) { const Value *Idx = *OI; if (auto *StTy = dyn_cast<StructType>(Ty)) { diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 3ba7e70a102d..a7817f4f67dd 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -349,12 +349,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Allocate space for the rest of the frame. const unsigned Alignment = MFI->getMaxAlignment(); - const bool NeedsRealignment = (Alignment > 16); + const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; - if (NeedsRealignment) { - // Use the first callee-saved register as a scratch register - assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) && - "No scratch register to align SP!"); + if (NumBytes && NeedsRealignment) { + // Use the first callee-saved register as a scratch register. scratchSPReg = AArch64::X9; } @@ -366,9 +364,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, MachineInstr::FrameSetup); - assert(!(NeedsRealignment && NumBytes==0) && - "NumBytes should never be 0 when realignment is needed"); - if (NumBytes && NeedsRealignment) { const unsigned NrBitsToZero = countTrailingZeros(Alignment); assert(NrBitsToZero > 1); @@ -881,28 +876,34 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( return true; } -void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( - MachineFunction &MF, RegScavenger *RS) const { +void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + // All calls are tail calls in GHC calling conv, and functions have no + // prologue/epilogue. + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + return; + + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( MF.getSubtarget().getRegisterInfo()); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); - MachineRegisterInfo *MRI = &MF.getRegInfo(); SmallVector<unsigned, 4> UnspilledCSGPRs; SmallVector<unsigned, 4> UnspilledCSFPRs; // The frame record needs to be created by saving the appropriate registers if (hasFP(MF)) { - MRI->setPhysRegUsed(AArch64::FP); - MRI->setPhysRegUsed(AArch64::LR); + SavedRegs.set(AArch64::FP); + SavedRegs.set(AArch64::LR); } // Spill the BasePtr if it's used. Do this first thing so that the // getCalleeSavedRegs() below will get the right answer. if (RegInfo->hasBasePointer(MF)) - MRI->setPhysRegUsed(RegInfo->getBaseRegister()); + SavedRegs.set(RegInfo->getBaseRegister()); if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) - MRI->setPhysRegUsed(AArch64::X9); + SavedRegs.set(AArch64::X9); // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumGPRSpilled = 0; @@ -924,8 +925,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( AArch64::FPR64RegClass.contains(EvenReg)) && "Register class mismatch!"); - const bool OddRegUsed = MRI->isPhysRegUsed(OddReg); - const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg); + const bool OddRegUsed = SavedRegs.test(OddReg); + const bool EvenRegUsed = SavedRegs.test(EvenReg); // Early exit if none of the registers in the register pair is actually // used. @@ -946,7 +947,7 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( if (OddRegUsed ^ EvenRegUsed) { // Find out which register is the additional spill. Reg = OddRegUsed ? EvenReg : OddReg; - MRI->setPhysRegUsed(Reg); + SavedRegs.set(Reg); } DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); @@ -1001,7 +1002,7 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( UnspilledCSGPRs.pop_back(); DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) << " to get a scratch register.\n"); - MRI->setPhysRegUsed(Reg); + SavedRegs.set(Reg); ExtraCSSpill = true; ++Count; } diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index b496fccba349..731f031ff855 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -59,8 +59,8 @@ public: bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; }; } // End llvm namespace diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 1ea4abcf05fa..772e894f4f0a 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -610,10 +610,11 @@ static bool isWorthFoldingADDlow(SDValue N) { bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm) { SDLoc dl(N); + const DataLayout &DL = CurDAG->getDataLayout(); const TargetLowering *TLI = getTargetLowering(); if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); return true; } @@ -628,10 +629,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, const GlobalValue *GV = GAN->getGlobal(); unsigned Alignment = GV->getAlignment(); - const DataLayout *DL = TLI->getDataLayout(); Type *Ty = GV->getType()->getElementType(); if (Alignment == 0 && Ty->isSized()) - Alignment = DL->getABITypeAlignment(Ty); + Alignment = DL.getABITypeAlignment(Ty); if (Alignment >= Size) return true; @@ -645,7 +645,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); } OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); return true; @@ -688,7 +688,8 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); const TargetLowering *TLI = getTargetLowering(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); return true; @@ -1494,7 +1495,7 @@ static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, } static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, - unsigned &LSB, unsigned &MSB, + unsigned &Immr, unsigned &Imms, bool BiggerPattern) { assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && "N must be a SHR/SRA operation to call this function"); @@ -1508,7 +1509,7 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, "Type checking must have been done before calling this function"); // Check for AND + SRL doing several bits extract. - if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB, MSB)) + if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) return true; // we're looking for a shift of a shift @@ -1548,13 +1549,9 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && "bad amount in shift node!"); - // Note: The width operand is encoded as width-1. - unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1; - int sLSB = Srl_imm - Shl_imm; - if (sLSB < 0) - return false; - LSB = sLSB; - MSB = LSB + Width; + int immr = Srl_imm - Shl_imm; + Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; + Imms = VT.getSizeInBits() - Shl_imm - Trunc_bits - 1; // SRA requires a signed extraction if (VT == MVT::i32) Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; @@ -1564,7 +1561,7 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, } static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, - SDValue &Opd0, unsigned &LSB, unsigned &MSB, + SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits = 0, bool BiggerPattern = false) { if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) @@ -1576,11 +1573,11 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, return false; break; case ISD::AND: - return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB, + return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, NumberOfIgnoredLowBits, BiggerPattern); case ISD::SRL: case ISD::SRA: - return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern); + return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); } unsigned NOpc = N->getMachineOpcode(); @@ -1593,8 +1590,8 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, case AArch64::UBFMXri: Opc = NOpc; Opd0 = N->getOperand(0); - LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); - MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); + Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); + Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); return true; } // Unreachable @@ -1602,9 +1599,9 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, } SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { - unsigned Opc, LSB, MSB; + unsigned Opc, Immr, Imms; SDValue Opd0; - if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB)) + if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) return nullptr; EVT VT = N->getValueType(0); @@ -1613,8 +1610,8 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { // If the bit extract operation is 64bit but the original type is 32bit, we // need to add one EXTRACT_SUBREG. if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { - SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, MVT::i64), - CurDAG->getTargetConstant(MSB, dl, MVT::i64)}; + SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), + CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); @@ -1624,8 +1621,8 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { return Node; } - SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, VT), - CurDAG->getTargetConstant(MSB, dl, VT)}; + SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), + CurDAG->getTargetConstant(Imms, dl, VT)}; return CurDAG->SelectNodeTo(N, Opc, VT, Ops); } @@ -2351,7 +2348,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { int FI = cast<FrameIndexSDNode>(Node)->getIndex(); unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); const TargetLowering *TLI = getTargetLowering(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + SDValue TFI = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); SDLoc DL(Node); SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index f3242cdd971d..3e8f46cf1ecd 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -705,7 +705,8 @@ void AArch64TargetLowering::addQRTypeForNEON(MVT VT) { addTypeForNEON(VT, MVT::v4i32); } -EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { +EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, + EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); @@ -774,7 +775,8 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode( } } -MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { +MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL, + EVT) const { return MVT::i64; } @@ -1710,7 +1712,8 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, const char *LibcallName = (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; - SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + SDValue Callee = + DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout())); StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); TargetLowering::CallLoweringInfo CLI(DAG); @@ -2089,7 +2092,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( CurArgIdx = Ins[i].getOrigArgIndex(); // Get type of the original argument. - EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true); + EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(), + /*AllowUnknown*/ true); MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other; // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) @@ -2111,7 +2115,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( if (Ins[i].Flags.isByVal()) { // Byval is used for HFAs in the PCS, but the system should work in a // non-compliant manner for larger structs. - EVT PtrTy = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); int Size = Ins[i].Flags.getByValSize(); unsigned NumRegs = (Size + 7) / 8; @@ -2119,7 +2123,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // case. It should also work for fundamental types too. unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false); - SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy); + SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT); InVals.push_back(FrameIdxN); continue; @@ -2186,7 +2190,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true); // Create load nodes to retrieve arguments from the stack. - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue ArgValue; // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) @@ -2265,6 +2269,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); SmallVector<SDValue, 8> MemOps; @@ -2279,7 +2284,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, if (GPRSaveSize != 0) { GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false); - SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT); for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) { unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass); @@ -2288,8 +2293,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, DAG.getStore(Val.getValue(1), DL, Val, FIN, MachinePointerInfo::getStack(i * 8), false, false, 0); MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(8, DL, getPointerTy())); + FIN = + DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT)); } } FuncInfo->setVarArgsGPRIndex(GPRIdx); @@ -2307,7 +2312,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, if (FPRSaveSize != 0) { FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); - SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT); for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass); @@ -2317,8 +2322,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, DAG.getStore(Val.getValue(1), DL, Val, FIN, MachinePointerInfo::getStack(i * 16), false, false, 0); MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(16, DL, getPointerTy())); + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, + DAG.getConstant(16, DL, PtrVT)); } } FuncInfo->setVarArgsFPRIndex(FPRIdx); @@ -2614,7 +2619,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, for (unsigned i = 0; i != NumArgs; ++i) { MVT ValVT = Outs[i].VT; // Get type of the original argument. - EVT ActualVT = getValueType(CLI.getArgs()[Outs[i].OrigArgIndex].Ty, + EVT ActualVT = getValueType(DAG.getDataLayout(), + CLI.getArgs()[Outs[i].OrigArgIndex].Ty, /*AllowUnknown*/ true); MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; @@ -2674,10 +2680,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, true), DL); - SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy()); + SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, + getPointerTy(DAG.getDataLayout())); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; + auto PtrVT = getPointerTy(DAG.getDataLayout()); // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; @@ -2743,13 +2751,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned LocMemOffset = VA.getLocMemOffset(); int32_t Offset = LocMemOffset + BEAlign; SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); - PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); if (IsTailCall) { Offset = Offset + FPDiff; int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); - DstAddr = DAG.getFrameIndex(FI, getPointerTy()); + DstAddr = DAG.getFrameIndex(FI, PtrVT); DstInfo = MachinePointerInfo::getFixedStack(FI); // Make sure any stack arguments overlapping with where we're storing @@ -2759,7 +2767,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } else { SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL); - DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); + DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); DstInfo = MachinePointerInfo::getStack(LocMemOffset); } @@ -2809,25 +2817,24 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, const GlobalValue *GV = G->getGlobal(); bool InternalLinkage = GV->hasInternalLinkage(); if (InternalLinkage) - Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0); + Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); else { - Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, - AArch64II::MO_GOT); - Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee); + Callee = + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT); + Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { const char *Sym = S->getSymbol(); - Callee = - DAG.getTargetExternalSymbol(Sym, getPointerTy(), AArch64II::MO_GOT); - Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee); + Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT); + Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); } } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0); + Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { const char *Sym = S->getSymbol(); - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0); + Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0); } // We don't usually want to end the call-sequence here because we would tidy @@ -2977,7 +2984,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GN->getGlobal(); @@ -3069,7 +3076,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); SDLoc DL(Op); - MVT PtrVT = getPointerTy(); + MVT PtrVT = getPointerTy(DAG.getDataLayout()); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); SDValue TLVPAddr = @@ -3124,7 +3131,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, /// the sequence is produced as per above. SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -3159,7 +3166,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, } SDValue TPOff; - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); const GlobalValue *GV = GA->getGlobal(); @@ -3786,7 +3793,7 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, // Jump table entries as PC relative offsets. No additional tweaking // is necessary here. Just get the address of the jump table. JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && @@ -3812,7 +3819,7 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large) { @@ -3853,7 +3860,7 @@ SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { @@ -3879,8 +3886,8 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, DAG.getMachineFunction().getInfo<AArch64FunctionInfo>(); SDLoc DL(Op); - SDValue FR = - DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy()); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), + getPointerTy(DAG.getDataLayout())); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), MachinePointerInfo(SV), false, false, 0); @@ -3892,6 +3899,7 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, // Standard, section B.3. MachineFunction &MF = DAG.getMachineFunction(); AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); SDValue Chain = Op.getOperand(0); @@ -3900,8 +3908,7 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SmallVector<SDValue, 4> MemOps; // void *__stack at offset 0 - SDValue Stack = - DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy()); + SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT); MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, MachinePointerInfo(SV), false, false, 8)); @@ -3910,12 +3917,12 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, if (GPRSize > 0) { SDValue GRTop, GRTopAddr; - GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(8, DL, getPointerTy())); + GRTopAddr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT)); - GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy()); - GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop, - DAG.getConstant(GPRSize, DL, getPointerTy())); + GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT); + GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop, + DAG.getConstant(GPRSize, DL, PtrVT)); MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, MachinePointerInfo(SV, 8), false, false, 8)); @@ -3925,28 +3932,28 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, int FPRSize = FuncInfo->getVarArgsFPRSize(); if (FPRSize > 0) { SDValue VRTop, VRTopAddr; - VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(16, DL, getPointerTy())); + VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getConstant(16, DL, PtrVT)); - VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy()); - VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop, - DAG.getConstant(FPRSize, DL, getPointerTy())); + VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT); + VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop, + DAG.getConstant(FPRSize, DL, PtrVT)); MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, MachinePointerInfo(SV, 16), false, false, 8)); } // int __gr_offs at offset 24 - SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(24, DL, getPointerTy())); + SDValue GROffsAddr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT)); MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr, MachinePointerInfo(SV, 24), false, false, 4)); // int __vr_offs at offset 28 - SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(28, DL, getPointerTy())); + SDValue VROffsAddr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT)); MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr, MachinePointerInfo(SV, 28), false, @@ -3987,21 +3994,22 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(1); unsigned Align = Op.getConstantOperandVal(3); + auto PtrVT = getPointerTy(DAG.getDataLayout()); - SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr, - MachinePointerInfo(V), false, false, false, 0); + SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V), + false, false, false, 0); Chain = VAList.getValue(1); if (Align > 8) { assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"); - VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(Align - 1, DL, getPointerTy())); - VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList, - DAG.getConstant(-(int64_t)Align, DL, getPointerTy())); + VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getConstant(Align - 1, DL, PtrVT)); + VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList, + DAG.getConstant(-(int64_t)Align, DL, PtrVT)); } Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); - uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy); + uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); // Scalar integer and FP values smaller than 64 bits are implicitly extended // up to 64 bits. At the very least, we have to increase the striding of the @@ -4016,8 +4024,8 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { } // Increment the pointer, VAList, to the next vaarg - SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(ArgSize, DL, getPointerTy())); + SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getConstant(ArgSize, DL, PtrVT)); // Store the incremented VAList to the legalized pointer SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V), false, false, 0); @@ -4057,8 +4065,8 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, - EVT VT) const { +unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const { unsigned Reg = StringSwitch<unsigned>(RegName) .Case("sp", AArch64::SP) .Default(0); @@ -4079,7 +4087,7 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); - SDValue Offset = DAG.getConstant(8, DL, getPointerTy()); + SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout())); return DAG.getLoad(VT, DL, DAG.getEntryNode(), DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo(), false, false, false, 0); @@ -4232,7 +4240,7 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. AArch64TargetLowering::ConstraintType -AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { +AArch64TargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: @@ -4283,8 +4291,7 @@ AArch64TargetLowering::getSingleConstraintMatchWeight( std::pair<unsigned, const TargetRegisterClass *> AArch64TargetLowering::getRegForInlineAsmConstraint( - const TargetRegisterInfo *TRI, const std::string &Constraint, - MVT VT) const { + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': @@ -4320,10 +4327,9 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( unsigned Size = Constraint.size(); if ((Size == 4 || Size == 5) && Constraint[0] == '{' && tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') { - const std::string Reg = - std::string(&Constraint[2], &Constraint[Size - 1]); - int RegNo = atoi(Reg.c_str()); - if (RegNo >= 0 && RegNo <= 31) { + int RegNo; + bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo); + if (!Failed && RegNo >= 0 && RegNo <= 31) { // v0 - v31 are aliases of q0 - q31. // By default we'll emit v0-v31 for this unless there's a modifier where // we'll emit the correct register as well. @@ -6429,6 +6435,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { + auto &DL = I.getModule()->getDataLayout(); switch (Intrinsic) { case Intrinsic::aarch64_neon_ld2: case Intrinsic::aarch64_neon_ld3: @@ -6444,7 +6451,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::aarch64_neon_ld4r: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; @@ -6470,7 +6477,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeAllocSize(ArgTy) / 8; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); @@ -6488,7 +6495,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = true; Info.writeMem = false; @@ -6501,7 +6508,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; - Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = false; Info.writeMem = true; @@ -6572,7 +6579,8 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const { return true; const TargetOptions &Options = getTargetMachine().Options; - EVT VT = getValueType(User->getOperand(0)->getType()); + const DataLayout &DL = I->getModule()->getDataLayout(); + EVT VT = getValueType(DL, User->getOperand(0)->getType()); if (isFMAFasterThanFMulAndFAdd(VT) && isOperationLegalOrCustom(ISD::FMA, VT) && @@ -6637,6 +6645,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const { break; case Instruction::GetElementPtr: { gep_type_iterator GTI = gep_type_begin(Instr); + auto &DL = Ext->getModule()->getDataLayout(); std::advance(GTI, U.getOperandNo()); Type *IdxTy = *GTI; // This extension will end up with a shift because of the scaling factor. @@ -6644,7 +6653,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const { // Get the shift amount based on the scaling factor: // log2(sizeof(IdxTy)) - log2(8). uint64_t ShiftAmt = - countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3; + countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3; // Is the constant foldable in the shift of the addressing mode? // I.e., shift amount is between 1 and 4 inclusive. if (ShiftAmt == 0 || ShiftAmt > 4) @@ -6708,10 +6717,10 @@ bool AArch64TargetLowering::lowerInterleavedLoad( assert(Shuffles.size() == Indices.size() && "Unmatched number of shufflevectors and indices"); - const DataLayout *DL = getDataLayout(); + const DataLayout &DL = LI->getModule()->getDataLayout(); VectorType *VecTy = Shuffles[0]->getType(); - unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy); + unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); // Skip illegal vector types. if (VecSize != 64 && VecSize != 128) @@ -6721,8 +6730,8 @@ bool AArch64TargetLowering::lowerInterleavedLoad( // load integer vectors first and then convert to pointer vectors. Type *EltTy = VecTy->getVectorElementType(); if (EltTy->isPointerTy()) - VecTy = VectorType::get(DL->getIntPtrType(EltTy), - VecTy->getVectorNumElements()); + VecTy = + VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements()); Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace()); Type *Tys[2] = {VecTy, PtrTy}; @@ -6796,8 +6805,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, Type *EltTy = VecTy->getVectorElementType(); VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); - const DataLayout *DL = getDataLayout(); - unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy); + const DataLayout &DL = SI->getModule()->getDataLayout(); + unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); // Skip illegal vector types. if (SubVecSize != 64 && SubVecSize != 128) @@ -6810,7 +6819,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, // StN intrinsics don't support pointer vectors as arguments. Convert pointer // vectors to integer vectors. if (EltTy->isPointerTy()) { - Type *IntTy = DL->getIntPtrType(EltTy); + Type *IntTy = DL.getIntPtrType(EltTy); unsigned NumOpElts = dyn_cast<VectorType>(Op0->getType())->getVectorNumElements(); @@ -6894,8 +6903,8 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. -bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // AArch64 has five basic addressing modes: // reg @@ -6916,7 +6925,7 @@ bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM, // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12 uint64_t NumBytes = 0; if (Ty->isSized()) { - uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty); + uint64_t NumBits = DL.getTypeSizeInBits(Ty); NumBytes = NumBits / 8; if (!isPowerOf2_64(NumBits)) NumBytes = 0; @@ -6946,8 +6955,8 @@ bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM, return false; } -int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM, - Type *Ty, +int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // Scaling factors are not free at all. // Operands | Rt Latency @@ -6956,7 +6965,7 @@ int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM, // ------------------------------------------- // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5 // Rt, [Xn, Wm, <extend> #imm] | - if (isLegalAddressingMode(AM, Ty, AS)) + if (isLegalAddressingMode(DL, AM, Ty, AS)) // Scale represents reg2 * scale, thus account for 1 if // it is not equal to 0 or 1. return AM.Scale != 0 && AM.Scale != 1; diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 46298c0e7de1..c73ce1e54b3e 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -233,7 +233,7 @@ public: APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth = 0) const override; - MVT getScalarShiftAmountTy(EVT LHSTy) const override; + MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; /// allowsMisalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses of the specified type. @@ -278,7 +278,8 @@ public: bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override; /// getSetCCResultType - Return the ISD::SETCC ValueType - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; @@ -323,7 +324,7 @@ public: /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; /// \brief Return the cost of the scaling factor used in the addressing @@ -331,7 +332,7 @@ public: /// of the specified type. /// If the AM is supported, the return value must be >= 0. /// If the AM is not supported, it returns a negative value. - int getScalingFactorCost(const AddrMode &AM, Type *Ty, + int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster @@ -471,9 +472,9 @@ private: std::vector<SDNode *> *Created) const override; bool combineRepeatedFPDivisors(unsigned NumUsers) const override; - ConstraintType - getConstraintType(const std::string &Constraint) const override; - unsigned getRegisterByName(const char* RegName, EVT VT) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; + unsigned getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. @@ -483,14 +484,12 @@ private: std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { + unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "Q") return InlineAsm::Constraint_Q; // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index b73e0958df90..fa1a46acba84 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -894,6 +894,8 @@ def REVXr : OneXRegData<0b011, "rev", bswap>; def REV32Xr : OneXRegData<0b010, "rev32", UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; +def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; + // The bswap commutes with the rotr so we want a pattern for both possible // orders. def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; @@ -5283,18 +5285,23 @@ def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; @@ -5309,12 +5316,16 @@ def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; +def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 1836682e386e..841af55f7a65 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -90,7 +90,7 @@ AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, BitVector AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const AArch64FrameLowering *TFI = getFrameLowering(MF); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); @@ -119,7 +119,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const AArch64FrameLowering *TFI = getFrameLowering(MF); switch (Reg) { default: @@ -198,11 +198,9 @@ bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64FrameLowering *TFI = getFrameLowering(MF); const Function *F = MF.getFunction(); - unsigned StackAlign = MF.getTarget() - .getSubtargetImpl(*MF.getFunction()) - ->getFrameLowering() - ->getStackAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, @@ -213,8 +211,7 @@ AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { unsigned AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - + const AArch64FrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP; } @@ -280,7 +277,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, // Note that the incoming offset is based on the SP value at function entry, // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const AArch64FrameLowering *TFI = getFrameLowering(MF); MachineFrameInfo *MFI = MF.getFrameInfo(); // Estimate an offset from the frame pointer. @@ -376,8 +373,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MBB.getParent(); const AArch64InstrInfo *TII = MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); - const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>( - MF.getSubtarget().getFrameLowering()); + const AArch64FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); unsigned FrameReg; @@ -415,7 +411,7 @@ namespace llvm { unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const AArch64FrameLowering *TFI = getFrameLowering(MF); switch (RC->getID()) { default: diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index b9c53998752a..f40293021d74 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -16,11 +16,6 @@ using namespace llvm; #define DEBUG_TYPE "aarch64-selectiondag-info" -AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const DataLayout *DL) - : TargetSelectionDAGInfo(DL) {} - -AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {} - SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, @@ -37,8 +32,8 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) { const AArch64TargetLowering &TLI = *STI.getTargetLowering(); - EVT IntPtr = TLI.getPointerTy(); - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); + Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h index 11932d2b1c22..97421b45b122 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -20,8 +20,6 @@ namespace llvm { class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit AArch64SelectionDAGInfo(const DataLayout *DL); - ~AArch64SelectionDAGInfo(); SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 554826b1e08a..486efd6ce3a2 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -49,15 +49,15 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, HasV8_1aOps(false), HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(), - InstrInfo(initializeSubtargetDependencies(FS)), - TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {} + InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(), + TLInfo(TM, *this) {} /// ClassifyGlobalReference - Find the target operand flags that describe /// how a global value should be referenced for the current subtarget. unsigned char AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { - bool isDecl = GV->isDeclarationForLinker(); + bool isDef = GV->isStrongDefinitionForLinker(); // MachO large model always goes via a GOT, simply to get a single 8-byte // absolute relocation on all global addresses. @@ -66,8 +66,7 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, // The small code mode's direct accesses use ADRP, which cannot necessarily // produce the value 0 (if the code is above 4GB). - if (TM.getCodeModel() == CodeModel::Small && - GV->isWeakForLinker() && isDecl) { + if (TM.getCodeModel() == CodeModel::Small && GV->hasExternalWeakLinkage()) { // In PIC mode use the GOT, but in absolute mode use a constant pool load. if (TM.getRelocationModel() == Reloc::Static) return AArch64II::MO_CONSTPOOL; @@ -85,8 +84,7 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, // defined could end up in unexpected places. Use a GOT. if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) { if (isTargetMachO()) - return (isDecl || GV->isWeakForLinker()) ? AArch64II::MO_GOT - : AArch64II::MO_NO_FLAG; + return isDef ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT; else // No need to go through the GOT for local symbols on ELF. return GV->hasLocalLinkage() ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT; diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index fc91c94351cc..e085cca35f1c 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -181,8 +181,8 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - EVT SrcTy = TLI->getValueType(Src); - EVT DstTy = TLI->getValueType(Dst); + EVT SrcTy = TLI->getValueType(DL, Src); + EVT DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) return BaseT::getCastInstrCost(Opcode, Dst, Src); @@ -265,7 +265,7 @@ unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, if (Index != -1U) { // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val); // This type is legalized to a scalar type. if (!LT.second.isVector()) @@ -289,7 +289,7 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost( TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -364,8 +364,8 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost } }; - EVT SelCondTy = TLI->getValueType(CondTy); - EVT SelValTy = TLI->getValueType(ValTy); + EVT SelCondTy = TLI->getValueType(DL, CondTy); + EVT SelValTy = TLI->getValueType(DL, ValTy); if (SelCondTy.isSimple() && SelValTy.isSimple()) { int Idx = ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(), @@ -380,7 +380,7 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) { - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 && Src->getVectorElementType()->isIntegerTy(64)) { @@ -416,7 +416,7 @@ unsigned AArch64TTIImpl::getInterleavedMemoryOpCost( if (Factor <= TLI->getMaxSupportedInterleaveFactor()) { unsigned NumElts = VecTy->getVectorNumElements(); Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy); + unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy); // ldN/stN only support legal vector types of size 64 or 128 in bits. if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 4dabdadd8eeb..444d3ccc15e1 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -31,7 +31,6 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { typedef TargetTransformInfo TTI; friend BaseT; - const AArch64TargetMachine *TM; const AArch64Subtarget *ST; const AArch64TargetLowering *TLI; @@ -50,30 +49,15 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { public: explicit AArch64TTIImpl(const AArch64TargetMachine *TM, Function &F) - : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)), + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. AArch64TTIImpl(const AArch64TTIImpl &Arg) - : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST), - TLI(Arg.TLI) {} + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} AArch64TTIImpl(AArch64TTIImpl &&Arg) - : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)), - ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - AArch64TTIImpl &operator=(const AArch64TTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - TM = RHS.TM; - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - AArch64TTIImpl &operator=(AArch64TTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - TM = std::move(RHS.TM); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} /// \name Scalar TTI Implementations /// @{ diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 359c2e734e21..db9fb0e775df 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -228,7 +228,7 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, } static MCSymbolizer * -createAArch64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo, +createAArch64ExternalSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo, MCContext *Ctx, std::unique_ptr<MCRelocationInfo> &&RelInfo) { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index b5b1d1f9e19c..16d53569b231 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -199,7 +199,7 @@ MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, MCTargetStreamer * createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { const Triple &TT = STI.getTargetTriple(); - if (TT.getObjectFormat() == Triple::ELF) + if (TT.isOSBinFormatELF()) return new AArch64TargetELFStreamer(S); return nullptr; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 099d1b01c339..9f7bed0d3b12 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -42,16 +42,13 @@ static MCInstrInfo *createAArch64MCInstrInfo() { static MCSubtargetInfo * createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - if (CPU.empty()) CPU = "generic"; - InitAArch64MCSubtargetInfo(X, TT, CPU, FS); - return X; + return createAArch64MCSubtargetInfoImpl(TT, CPU, FS); } -static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { +static MCRegisterInfo *createAArch64MCRegisterInfo(const Triple &Triple) { MCRegisterInfo *X = new MCRegisterInfo(); InitAArch64MCRegisterInfo(X, AArch64::LR); return X; @@ -75,11 +72,11 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createAArch64MCCodeGenInfo(const Triple &TT, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { - Triple TheTriple(TT); - assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) && + assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()) && "Only expect Darwin and ELF targets"); if (CM == CodeModel::Default) @@ -94,7 +91,7 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, "Only small and large code models are allowed on AArch64"); // AArch64 Darwin is always PIC. - if (TheTriple.isOSDarwin()) + if (TT.isOSDarwin()) RM = Reloc::PIC_; // On ELF platforms the default static relocation model has a smart enough // linker to cope with referencing external symbols defined in a shared diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 569ad3844b25..ef8ef6268548 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -98,6 +98,16 @@ def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", "true", "Enable SI load/store optimizer pass">; +// Performance debugging feature. Allow using DS instruction immediate +// offsets even if the base pointer can't be proven to be base. On SI, +// base pointer values that won't give the same result as a 16-bit add +// are not safe to fold, but this will override the conservative test +// for the base pointer. +def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-folding", + "EnableUnsafeDSOffsetFolding", + "true", + "Force using DS instruction immediate offsets on SI">; + def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "FlatAddressSpace", "true", diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index 0b426bc63dd5..ad267d350850 100644 --- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -22,7 +22,6 @@ using namespace llvm; namespace { class AMDGPUAlwaysInline : public ModulePass { - static char ID; public: @@ -36,10 +35,9 @@ public: char AMDGPUAlwaysInline::ID = 0; bool AMDGPUAlwaysInline::runOnModule(Module &M) { + std::vector<Function *> FuncsToClone; - std::vector<Function*> FuncsToClone; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - Function &F = *I; + for (Function &F : M) { if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() && !F.hasFnAttribute(Attribute::NoInline)) FuncsToClone.push_back(&F); @@ -49,12 +47,11 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) { ValueToValueMapTy VMap; Function *NewFunc = CloneFunction(F, VMap, false); NewFunc->setLinkage(GlobalValue::InternalLinkage); - F->getParent()->getFunctionList().push_back(NewFunc); + M.getFunctionList().push_back(NewFunc); F->replaceAllUsesWith(NewFunc); } - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - Function &F = *I; + for (Function &F : M) { if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) { F.addFnAttr(Attribute::AlwaysInline); } diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index df4461eac4db..37b77d778d9f 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -110,8 +110,11 @@ private: SDValue &Offset, SDValue &GLC) const; SDNode *SelectAddrSpaceCast(SDNode *N); bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Clamp, SDValue &Omod) const; bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Omod) const; @@ -859,7 +862,8 @@ bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, (OffsetBits == 8 && !isUInt<8>(Offset))) return false; - if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || + Subtarget->unsafeDSOffsetFoldingEnabled()) return true; // On Southern Islands instruction with a negative base value and an offset @@ -1316,6 +1320,12 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, return true; } +bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + bool Res = SelectVOP3Mods(In, Src, SrcMods); + return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); +} + bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const { @@ -1327,6 +1337,16 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, return SelectVOP3Mods(In, Src, SrcMods); } +bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, + SDValue &SrcMods, SDValue &Clamp, + SDValue &Omod) const { + bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); + + return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && + cast<ConstantSDNode>(Clamp)->isNullValue() && + cast<ConstantSDNode>(Omod)->isNullValue(); +} + bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Omod) const { @@ -1351,18 +1371,14 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() { do { IsModified = false; // Go over all selected nodes and try to fold them a bit more - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - - SDNode *Node = I; - - MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I); + for (SDNode &Node : CurDAG->allnodes()) { + MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); if (!MachineNode) continue; SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); - if (ResNode != Node) { - ReplaceUses(Node, ResNode); + if (ResNode != &Node) { + ReplaceUses(&Node, ResNode); IsModified = true; } } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index d56838ec2019..3a65f3b56146 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -406,6 +406,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); + setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SELECT_CC); @@ -444,7 +445,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, // Target Information //===----------------------------------------------------------------------===// -MVT AMDGPUTargetLowering::getVectorIdxTy() const { +MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const { return MVT::i32; } @@ -545,9 +546,8 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const { } bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const { - const DataLayout *DL = getDataLayout(); - unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType()); - unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType()); + unsigned SrcSize = Src->getScalarSizeInBits(); + unsigned DestSize = Dest->getScalarSizeInBits(); return SrcSize == 32 && DestSize == 64; } @@ -697,7 +697,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, const SDValue &InitPtr, SDValue Chain, SelectionDAG &DAG) const { - const DataLayout *TD = getDataLayout(); + const DataLayout &TD = DAG.getDataLayout(); SDLoc DL(InitPtr); Type *InitTy = Init->getType(); @@ -705,20 +705,20 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, EVT VT = EVT::getEVT(InitTy); PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS); return DAG.getStore(Chain, DL, DAG.getConstant(*CI, DL, VT), InitPtr, - MachinePointerInfo(UndefValue::get(PtrTy)), false, false, - TD->getPrefTypeAlignment(InitTy)); + MachinePointerInfo(UndefValue::get(PtrTy)), false, + false, TD.getPrefTypeAlignment(InitTy)); } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) { EVT VT = EVT::getEVT(CFP->getType()); PointerType *PtrTy = PointerType::get(CFP->getType(), 0); return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, DL, VT), InitPtr, - MachinePointerInfo(UndefValue::get(PtrTy)), false, false, - TD->getPrefTypeAlignment(CFP->getType())); + MachinePointerInfo(UndefValue::get(PtrTy)), false, + false, TD.getPrefTypeAlignment(CFP->getType())); } if (StructType *ST = dyn_cast<StructType>(InitTy)) { - const StructLayout *SL = TD->getStructLayout(ST); + const StructLayout *SL = TD.getStructLayout(ST); EVT PtrVT = InitPtr.getValueType(); SmallVector<SDValue, 8> Chains; @@ -745,7 +745,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, else llvm_unreachable("Unexpected type"); - unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType()); + unsigned EltSize = TD.getTypeAllocSize(SeqTy->getElementType()); SmallVector<SDValue, 8> Chains; for (unsigned i = 0; i < NumElements; ++i) { SDValue Offset = DAG.getConstant(i * EltSize, DL, PtrVT); @@ -762,8 +762,8 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, EVT VT = EVT::getEVT(InitTy); PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS); return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr, - MachinePointerInfo(UndefValue::get(PtrTy)), false, false, - TD->getPrefTypeAlignment(InitTy)); + MachinePointerInfo(UndefValue::get(PtrTy)), false, + false, TD.getPrefTypeAlignment(InitTy)); } Init->dump(); @@ -785,7 +785,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, SDValue Op, SelectionDAG &DAG) const { - const DataLayout *TD = getDataLayout(); + const DataLayout &DL = DAG.getDataLayout(); GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = G->getGlobal(); @@ -801,7 +801,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, unsigned Offset; if (MFI->LocalMemoryObjects.count(GV) == 0) { - uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); Offset = MFI->LDSSize; MFI->LocalMemoryObjects[GV] = Offset; // XXX: Account for alignment? @@ -811,16 +811,16 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, } return DAG.getConstant(Offset, SDLoc(Op), - getPointerTy(AMDGPUAS::LOCAL_ADDRESS)); + getPointerTy(DL, AMDGPUAS::LOCAL_ADDRESS)); } case AMDGPUAS::CONSTANT_ADDRESS: { MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); Type *EltType = GV->getType()->getElementType(); - unsigned Size = TD->getTypeAllocSize(EltType); - unsigned Alignment = TD->getPrefTypeAlignment(EltType); + unsigned Size = DL.getTypeAllocSize(EltType); + unsigned Alignment = DL.getPrefTypeAlignment(EltType); - MVT PrivPtrVT = getPointerTy(AMDGPUAS::PRIVATE_ADDRESS); - MVT ConstPtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS); + MVT PrivPtrVT = getPointerTy(DL, AMDGPUAS::PRIVATE_ADDRESS); + MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); int FI = FrameInfo->CreateStackObject(Size, Alignment, false); SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT); @@ -1653,7 +1653,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool // fb = fabs(fb); fb = DAG.getNode(ISD::FABS, DL, FltVT, fb); - EVT SetCCVT = getSetCCResultType(*DAG.getContext(), VT); + EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); // int cv = fr >= fb; SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE); @@ -1960,7 +1960,8 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64); const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64); - EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT); SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); @@ -2020,7 +2021,8 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { SDValue Not = DAG.getNOT(SL, Shr, MVT::i64); SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not); - EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32); const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32); @@ -2051,7 +2053,8 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51"); SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64); - EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT); return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2); @@ -2081,7 +2084,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X); - EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32); SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE); @@ -2100,8 +2104,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const const SDValue One = DAG.getConstant(1, SL, MVT::i32); const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32); const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32); - EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32); - + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32); SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X); @@ -2172,7 +2176,8 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64); const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64); - EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT); SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); @@ -2411,6 +2416,33 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N, SN->getBasePtr(), SN->getMemOperand()); } +SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + if (N->getValueType(0) != MVT::i64) + return SDValue(); + + // i64 (shl x, 32) -> (build_pair 0, x) + + // Doing this with moves theoretically helps MI optimizations that understand + // copies. 2 v_mov_b32_e32 will have the same code size / cycle count as + // v_lshl_b64. In the SALU case, I think this is slightly worse since it + // doubles the code size and I'm unsure about cycle count. + const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!RHS || RHS->getZExtValue() != 32) + return SDValue(); + + SDValue LHS = N->getOperand(0); + + SDLoc SL(N); + SelectionDAG &DAG = DCI.DAG; + + // Extract low 32-bits. + SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS); + + const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); + return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Zero, Lo); +} + SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const { EVT VT = N->getValueType(0); @@ -2448,17 +2480,24 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, SDLoc DL(N); switch(N->getOpcode()) { - default: break; - case ISD::MUL: - return performMulCombine(N, DCI); - case AMDGPUISD::MUL_I24: - case AMDGPUISD::MUL_U24: { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - simplifyI24(N0, DCI); - simplifyI24(N1, DCI); - return SDValue(); - } + default: + break; + case ISD::SHL: { + if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) + break; + + return performShlCombine(N, DCI); + } + case ISD::MUL: + return performMulCombine(N, DCI); + case AMDGPUISD::MUL_I24: + case AMDGPUISD::MUL_U24: { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + simplifyI24(N0, DCI); + simplifyI24(N1, DCI); + return SDValue(); + } case ISD::SELECT: { SDValue Cond = N->getOperand(0); if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) { @@ -2644,6 +2683,18 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, return DAG.getRegister(VirtualRegister, VT); } +uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( + const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const { + uint64_t ArgOffset = MFI->ABIArgOffset; + switch (Param) { + case GRID_DIM: + return ArgOffset; + case GRID_OFFSET: + return ArgOffset + 4; + } + llvm_unreachable("unexpected implicit parameter type"); +} + #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index fbb7d3c88437..478b2035fd75 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -65,6 +65,7 @@ private: SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; protected: @@ -123,7 +124,7 @@ public: bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; - MVT getVectorIdxTy() const override; + MVT getVectorIdxTy(const DataLayout &) const override; bool isSelectSupported(SelectSupportKind) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; @@ -207,6 +208,16 @@ public: virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const; + + enum ImplicitParameter { + GRID_DIM, + GRID_OFFSET + }; + + /// \brief Helper function that returns the byte offset of the given + /// type of implicit parameter. + unsigned getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, + const ImplicitParameter Param) const; }; namespace AMDGPUISD { diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 0779d1d786b2..bd5abc4f546e 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -69,6 +69,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FP64Denormals(false), FP32Denormals(false), FastFMAF32(false), CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), + EnableUnsafeDSOffsetFolding(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 30f50eb1d2f3..90831bfb4458 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -76,6 +76,7 @@ private: bool EnablePromoteAlloca; bool EnableIfCvt; bool EnableLoadStoreOpt; + bool EnableUnsafeDSOffsetFolding; unsigned WavefrontSize; bool CFALUBug; int LocalMemorySize; @@ -222,6 +223,10 @@ public: return EnableLoadStoreOpt; } + bool unsafeDSOffsetFoldingEnabled() const { + return EnableUnsafeDSOffsetFolding; + } + unsigned getWavefrontSize() const { return WavefrontSize; } diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index a9a911a8efed..2297b52b423c 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -156,8 +156,10 @@ public: } // End of anonymous namespace TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis( - [this](Function &F) { return TargetTransformInfo(AMDGPUTTIImpl(this)); }); + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo( + AMDGPUTTIImpl(this, F.getParent()->getDataLayout())); + }); } void AMDGPUPassConfig::addIRPasses() { @@ -269,6 +271,7 @@ void GCNPassConfig::addPreRegAlloc() { // also need extra copies to the address operand to be eliminated. initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); + insertPass(&MachineSchedulerID, &RegisterCoalescerID); } addPass(createSIShrinkInstructionsPass(), false); addPass(createSIFixSGPRLiveRangesPass(), false); @@ -280,10 +283,10 @@ void GCNPassConfig::addPostRegAlloc() { } void GCNPassConfig::addPreSched2() { - addPass(createSIInsertWaits(*TM), false); } void GCNPassConfig::addPreEmitPass() { + addPass(createSIInsertWaits(*TM), false); addPass(createSILowerControlFlowPass(*TM), false); } diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 791c84e6f28b..dee0a69d1e68 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -37,8 +37,9 @@ class AMDGPUTTIImpl : public BasicTTIImplBase<AMDGPUTTIImpl> { const AMDGPUTargetLowering *getTLI() const { return TLI; } public: - explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM) - : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {} + explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const DataLayout &DL) + : BaseT(TM, DL), ST(TM->getSubtargetImpl()), + TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg) @@ -46,18 +47,6 @@ public: AMDGPUTTIImpl(AMDGPUTTIImpl &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - AMDGPUTTIImpl &operator=(const AMDGPUTTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - AMDGPUTTIImpl &operator=(AMDGPUTTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } bool hasBranchDivergence() { return true; } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 7172e4bb9335..c709741f3777 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -44,7 +44,7 @@ static MCInstrInfo *createAMDGPUMCInstrInfo() { return X; } -static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) { +static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); InitAMDGPUMCRegisterInfo(X, 0); return X; @@ -52,14 +52,13 @@ static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) { static MCSubtargetInfo * createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo * X = new MCSubtargetInfo(); - InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS); - return X; + return createAMDGPUMCSubtargetInfoImpl(TT, CPU, FS); } -static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { +static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(const Triple &TT, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); X->initMCCodeGenInfo(RM, CM, OL); return X; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 8357b6d9d0ed..4e4d554f0ee7 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -815,8 +815,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case Intrinsic::r600_read_local_size_z: return LowerImplicitParameter(DAG, VT, DL, 8); - case Intrinsic::AMDGPU_read_workdim: - return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4); + case Intrinsic::AMDGPU_read_workdim: { + uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM); + return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4); + } case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, @@ -897,8 +899,9 @@ SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG, for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) { - Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector, - DAG.getConstant(i, DL, getVectorIdxTy()))); + Args.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector, + DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout())))); } return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args); @@ -1459,22 +1462,17 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const SDValue Ptr = Op.getOperand(1); SDValue LoweredLoad; - SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG); - if (Ret.getNode()) { - SDValue Ops[2] = { - Ret, - Chain - }; - return DAG.getMergeValues(Ops, DL); - } + if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG)) + return Ret; // Lower loads constant address space global variable loads if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && isa<GlobalVariable>(GetUnderlyingObject( - LoadNode->getMemOperand()->getValue(), *getDataLayout()))) { + LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) { - SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL, - getPointerTy(AMDGPUAS::PRIVATE_ADDRESS)); + SDValue Ptr = DAG.getZExtOrTrunc( + LoadNode->getBasePtr(), DL, + getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS)); Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32)); return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(), @@ -1702,7 +1700,8 @@ SDValue R600TargetLowering::LowerFormalArguments( return Chain; } -EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { +EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, + EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index c06d3c4fd309..4dbac97af2a1 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -38,7 +38,9 @@ public: const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override; - EVT getSetCCResultType(LLVMContext &, EVT VT) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, + EVT VT) const override; + private: unsigned Gen; /// Each OpenCL kernel has nine implicit parameters that are stored in the diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index d14e37a64612..c2887255cc11 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -126,11 +126,42 @@ static bool updateOperand(FoldCandidate &Fold, return false; } +static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList, + const MachineInstr *MI) { + for (auto Candidate : FoldList) { + if (Candidate.UseMI == MI) + return true; + } + return false; +} + static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII) { if (!TII->isOperandLegal(MI, OpNo, OpToFold)) { + + // Special case for v_mac_f32_e64 if we are trying to fold into src2 + unsigned Opc = MI->getOpcode(); + if (Opc == AMDGPU::V_MAC_F32_e64 && + (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) { + // Check if changing this to a v_mad_f32 instruction will allow us to + // fold the operand. + MI->setDesc(TII->get(AMDGPU::V_MAD_F32)); + bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII); + if (FoldAsMAD) { + MI->untieRegOperand(OpNo); + return true; + } + MI->setDesc(TII->get(Opc)); + } + + // If we are already folding into another operand of MI, then + // we can't commute the instruction, otherwise we risk making the + // other fold illegal. + if (isUseMIInFoldList(FoldList, MI)) + return false; + // Operand is not legal, so try to commute the instruction to // see if this makes it possible to fold. unsigned CommuteIdx0; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index ead1a3743473..dd818a9ba746 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -254,8 +254,9 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &, return false; } -bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, unsigned AS) const { +bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; @@ -416,7 +417,7 @@ static EVT toIntegerVT(EVT VT) { SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc SL, SDValue Chain, unsigned Offset, bool Signed) const { - const DataLayout *DL = getDataLayout(); + const DataLayout &DL = DAG.getDataLayout(); MachineFunction &MF = DAG.getMachineFunction(); const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo()); @@ -425,16 +426,16 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, Type *Ty = VT.getTypeForEVT(*DAG.getContext()); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - MVT PtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS); + MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, MRI.getLiveInVirtReg(InputPtrReg), PtrVT); SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, DAG.getConstant(Offset, SL, PtrVT)); - SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS)); + SDValue PtrOffset = DAG.getUNDEF(PtrVT); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); - unsigned Align = DL->getABITypeAlignment(Ty); + unsigned Align = DL.getABITypeAlignment(Ty); if (VT != MemVT && VT.isFloatingPoint()) { // Do an integer load and convert. @@ -451,7 +452,12 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, true, // isNonTemporal true, // isInvariant Align); // Alignment - return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load); + SDValue Ops[] = { + DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load), + Load.getValue(1) + }; + + return DAG.getMergeValues(Ops, SL); } ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD; @@ -569,6 +575,8 @@ SDValue SITargetLowering::LowerFormalArguments( AnalyzeFormalArguments(CCInfo, Splits); + SmallVector<SDValue, 16> Chains; + for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { const ISD::InputArg &Arg = Ins[i]; @@ -587,8 +595,9 @@ SDValue SITargetLowering::LowerFormalArguments( VA.getLocMemOffset(); // The first 36 bytes of the input buffer contains information about // thread group and global sizes. - SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(), + SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain, Offset, Ins[i].Flags.isSExt()); + Chains.push_back(Arg.getValue(1)); const PointerType *ParamTy = dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex())); @@ -614,7 +623,8 @@ SDValue SITargetLowering::LowerFormalArguments( Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, &AMDGPU::SReg_64RegClass); Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass); - InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); + SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT); + InVals.push_back(Copy); continue; } @@ -634,7 +644,9 @@ SDValue SITargetLowering::LowerFormalArguments( for (unsigned j = 1; j != NumElements; ++j) { Reg = ArgLocs[ArgIdx++].getLocReg(); Reg = MF.addLiveIn(Reg, RC); - Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); + + SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT); + Regs.push_back(Copy); } // Fill up the missing vector elements @@ -653,7 +665,11 @@ SDValue SITargetLowering::LowerFormalArguments( AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs())); Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx); } - return Chain; + + if (Chains.empty()) + return Chain; + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( @@ -695,14 +711,15 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const { return true; } -EVT SITargetLowering::getSetCCResultType(LLVMContext &Ctx, EVT VT) const { +EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, + EVT VT) const { if (!VT.isVector()) { return MVT::i1; } return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); } -MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { +MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT) const { return MVT::i32; } @@ -888,7 +905,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDLoc DL(GSD); const GlobalValue *GV = GSD->getGlobal(); - MVT PtrVT = getPointerTy(GSD->getAddressSpace()); + MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace()); SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); @@ -926,6 +943,7 @@ SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); + auto MFI = MF.getInfo<SIMachineFunctionInfo>(); const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); @@ -964,8 +982,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::AMDGPU_read_workdim: return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset, - false); + getImplicitParameterOffset(MFI, GRID_DIM), false); case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, @@ -1213,7 +1230,8 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const { const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32); - EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32); + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32); SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT); @@ -1411,7 +1429,7 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N, unsigned AS = Load->getAddressSpace(); unsigned Align = Load->getAlignment(); Type *Ty = LoadVT.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); + unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); // Don't try to replace the load if we have to expand it due to alignment // problems. Otherwise we will end up scalarizing the load, and trying to @@ -2212,9 +2230,8 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, std::pair<unsigned, const TargetRegisterClass *> SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint_, + StringRef Constraint, MVT VT) const { - StringRef Constraint(Constraint_); if (Constraint == "r") { switch(VT.SimpleTy) { default: llvm_unreachable("Unhandled type for 'r' inline asm constraint"); diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index a956b013bdb1..635b4edc89de 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -62,8 +62,8 @@ public: bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/, EVT /*VT*/) const override; - bool isLegalAddressingMode(const AddrMode &AM, - Type *Ty, unsigned AS) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, @@ -90,8 +90,9 @@ public: MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, MachineBasicBlock * BB) const override; bool enableAggressiveFMAFusion(EVT VT) const override; - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; - MVT getScalarShiftAmountTy(EVT VT) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override; bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; @@ -114,9 +115,9 @@ public: SDLoc DL, SDValue Ptr) const; - std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint( - const TargetRegisterInfo *TRI, - const std::string &Constraint, MVT VT) const override; + std::pair<unsigned, const TargetRegisterClass *> + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const; }; diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index eb96bd0227b2..18910615bebe 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -227,9 +227,8 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, uint8_t Offset0 = Offset0Imm->getImm(); uint8_t Offset1 = Offset1Imm->getImm(); - assert(Offset1 > Offset0); - if (Offset1 - Offset0 == 1) { + if (Offset1 > Offset0 && Offset1 - Offset0 == 1) { // Each of these offsets is in element sized units, so we need to convert // to bytes of the individual reads. @@ -924,7 +923,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, return false; unsigned Opc = UseMI->getOpcode(); - if (Opc == AMDGPU::V_MAD_F32) { + if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { // Don't fold if we are using source modifiers. The new VOP2 instructions // don't have them. if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) || @@ -963,9 +962,9 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, // instead of having to modify in place. // Remove these first since they are at the end. - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32, + UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32, + UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); unsigned Src1Reg = Src1->getReg(); @@ -980,6 +979,14 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, Src1->setSubReg(Src2SubReg); Src1->setIsKill(Src2->isKill()); + if (Opc == AMDGPU::V_MAC_F32_e64) { + UseMI->untieRegOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); + } + + UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, + AMDGPU::OpName::src2)); + // ChangingToImmediate adds Src2 back to the instruction. Src2->ChangeToImmediate(Imm); removeModOperands(*UseMI); @@ -1010,11 +1017,17 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, // instead of having to modify in place. // Remove these first since they are at the end. - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32, + UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); - UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32, + UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); + if (Opc == AMDGPU::V_MAC_F32_e64) { + UseMI->untieRegOperand( + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); + } + + // ChangingToImmediate adds Src2 back to the instruction. Src2->ChangeToImmediate(Imm); // These come before src2. @@ -1126,6 +1139,38 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa, return false; } +MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, + MachineBasicBlock::iterator &MI, + LiveVariables *LV) const { + + switch (MI->getOpcode()) { + default: return nullptr; + case AMDGPU::V_MAC_F32_e64: break; + case AMDGPU::V_MAC_F32_e32: { + const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); + if (Src0->isImm() && !isInlineConstant(*Src0, 4)) + return nullptr; + break; + } + } + + const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::dst); + const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0); + const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1); + const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2); + + return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32)) + .addOperand(*Dst) + .addImm(0) // Src0 mods + .addOperand(*Src0) + .addImm(0) // Src1 mods + .addOperand(*Src1) + .addImm(0) // Src mods + .addOperand(*Src2) + .addImm(0) // clamp + .addImm(0); // omod +} + bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { int64_t SVal = Imm.getSExtValue(); if (SVal >= -16 && SVal <= 64) @@ -1625,7 +1670,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx, if (MO->isReg()) { assert(DefinedRC); - const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg()); + const TargetRegisterClass *RC = + TargetRegisterInfo::isVirtualRegister(MO->getReg()) ? + MRI.getRegClass(MO->getReg()) : + RI.getPhysRegClass(MO->getReg()); // In order to be legal, the common sub-class must be equal to the // class of the current operand. For example: diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 0382272068d2..015ea12d4598 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -144,6 +144,10 @@ public: unsigned getMachineCSELookAheadLimit() const override { return 500; } + MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB, + MachineBasicBlock::iterator &MI, + LiveVariables *LV) const override; + bool isSALU(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SALU; } diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index fcb58d5da3b0..b39a78714640 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -529,9 +529,11 @@ def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">; def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">; def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; +def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">; def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">; def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">; def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; +def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">; //===----------------------------------------------------------------------===// // SI assembler operands @@ -1113,6 +1115,13 @@ def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> { field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2); field string Asm = "$dst, $src0, $vsrc1, $src2"; } +def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> { + let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); + let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3, + HasModifiers>.ret; + let Asm32 = getAsm32<2>.ret; + let Asm64 = getAsm64<2, HasModifiers>.ret; +} def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 8c8d836776db..1ee63c675822 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1488,7 +1488,10 @@ defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>; defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>; defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>; -defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>; +let Constraints = "$dst = $src2", DisableEncoding="$src2", + isConvertibleToThreeAddress = 1 in { +defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_MAC>; +} } // End isCommutable = 1 defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">; @@ -2206,6 +2209,15 @@ def : Pat < (V_CNDMASK_B32_e64 $src2, $src1, $src0) >; +// Pattern for V_MAC_F32 +def : Pat < + (fmad (VOP3NoMods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), + (VOP3NoMods f32:$src1, i32:$src1_modifiers), + (VOP3NoMods f32:$src2, i32:$src2_modifiers)), + (V_MAC_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1, + $src2_modifiers, $src2, $clamp, $omod) +>; + /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 9b1d256dc5a8..1bdb1f0ee9f9 100644 --- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -214,12 +214,11 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( // cases, like vectors of pointers. const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr); - unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg(); - unsigned DestReg1 - = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst)->getReg(); + const MachineOperand *Dest0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst); + const MachineOperand *Dest1 = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst); unsigned Offset0 - = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff; + = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff; unsigned Offset1 = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff; @@ -258,20 +257,43 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1; unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3; - updateRegDefsUses(DestReg0, DestReg, SubRegIdx0); - updateRegDefsUses(DestReg1, DestReg, SubRegIdx1); - LIS->RemoveMachineInstrFromMaps(I); - // Replacing Paired in the maps with Read2 allows us to avoid updating the - // live range for the m0 register. - LIS->ReplaceMachineInstrInMaps(Paired, Read2); + const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + + // Copy to the old destination registers. + MachineInstr *Copy0 = BuildMI(*MBB, I, DL, CopyDesc) + .addOperand(*Dest0) // Copy to same destination including flags and sub reg. + .addReg(DestReg, 0, SubRegIdx0); + MachineInstr *Copy1 = BuildMI(*MBB, I, DL, CopyDesc) + .addOperand(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); + + LIS->InsertMachineInstrInMaps(Read2); + + // repairLiveintervalsInRange() doesn't handle physical register, so we have + // to update the M0 range manually. + SlotIndex PairedIndex = LIS->getInstructionIndex(Paired); + LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI)); + LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex); + bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot(); + + // The new write to the original destination register is now the copy. Steal + // the old SlotIndex. + LIS->ReplaceMachineInstrInMaps(I, Copy0); + LIS->ReplaceMachineInstrInMaps(Paired, Copy1); + I->eraseFromParent(); Paired->eraseFromParent(); LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg()); LIS->shrinkToUses(&AddrRegLI); - LIS->getInterval(DestReg); // Create new LI + LIS->createAndComputeVirtRegInterval(DestReg); + + if (UpdateM0Range) { + SlotIndex Read2Index = LIS->getInstructionIndex(Read2); + M0Segment->end = Read2Index.getRegSlot(); + } DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n'); return Read2.getInstr(); diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 587ea63d6796..d23b92edef33 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -53,7 +53,6 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( if (!LaneVGPRs.count(LaneVGPRIdx)) { unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); LaneVGPRs[LaneVGPRIdx] = LaneVGPR; - MRI.setPhysRegUsed(LaneVGPR); // Add this register as live-in to all blocks to avoid machine verifer // complaining about use of an undefined physical register. diff --git a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp index 0a7f684552f0..b086d2ed6652 100644 --- a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp +++ b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp @@ -91,7 +91,6 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { if (ScratchOffsetReg != AMDGPU::NoRegister) { // Found an SGPR to use - MRI.setPhysRegUsed(ScratchOffsetReg); BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg) .addReg(ScratchOffsetPreloadReg); } else { diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index db2ff0b1f952..ce4acafac9fa 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -499,7 +499,7 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { - if (!MRI.isPhysRegUsed(*I)) + if (MRI.reg_nodbg_empty(*I)) return *I; } return AMDGPU::NoRegister; diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 51e72cdb5f9e..5d00bdd6a9bb 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -94,8 +94,20 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, // is vcc. We should handle this the same way we handle vopc, by addding // a register allocation hint pre-regalloc and then do the shrining // post-regalloc. - if (Src2) - return false; + if (Src2) { + switch (MI.getOpcode()) { + default: return false; + + case AMDGPU::V_MAC_F32_e64: + if (!isVGPR(Src2, TRI, MRI) || + TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers)) + return false; + break; + + case AMDGPU::V_CNDMASK_B32_e64: + break; + } + } const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); const MachineOperand *Src1Mod = @@ -149,7 +161,7 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, return; // Try to fold Src0 - if (Src0.isReg()) { + if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) { unsigned Reg = Src0.getReg(); MachineInstr *Def = MRI.getUniqueVRegDef(Reg); if (Def && Def->isMoveImmediate()) { @@ -243,6 +255,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { continue; } + if (Op32 == AMDGPU::V_CNDMASK_B32_e32) { + // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC + // instructions. + const MachineOperand *Src2 = + TII->getNamedOperand(MI, AMDGPU::OpName::src2); + if (!Src2->isReg()) + continue; + unsigned SReg = Src2->getReg(); + if (TargetRegisterInfo::isVirtualRegister(SReg)) { + MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC); + continue; + } + if (SReg != AMDGPU::VCC) + continue; + } + // We can shrink this instruction DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';); @@ -259,6 +287,11 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (Src1) Inst32.addOperand(*Src1); + const MachineOperand *Src2 = + TII->getNamedOperand(MI, AMDGPU::OpName::src2); + if (Src2) + Inst32.addOperand(*Src2); + ++NumInstructionsShrunk; MI.eraseFromParent(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 96b4742da2bb..ef609a66d032 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -150,6 +150,10 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", "NaCl trap">; +def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", + "Generate calls via indirect call " + "instructions">; + // ARM ISAs. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index b1a11d626bda..9f43e732bd73 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1230,8 +1230,7 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { Reloc::Model RM = MF.getTarget().getRelocationModel(); if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { - assert(getSubtarget().getTargetTriple().getObjectFormat() == - Triple::MachO && + assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() && "LOAD_STACK_GUARD currently supported only for MachO."); expandLoadStackGuard(MI, RM); MI->getParent()->erase(MI); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 3f79a9b53d70..e7d5be7753e4 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -127,7 +127,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); - const TargetFrameLowering *TFI = STI.getFrameLowering(); + const ARMFrameLowering *TFI = getFrameLowering(MF); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); @@ -194,7 +194,7 @@ unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); - const TargetFrameLowering *TFI = STI.getFrameLowering(); + const ARMFrameLowering *TFI = getFrameLowering(MF); switch (RC->getID()) { default: @@ -302,7 +302,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg, bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const ARMFrameLowering *TFI = getFrameLowering(MF); // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the @@ -333,6 +333,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const ARMFrameLowering *TFI = getFrameLowering(MF); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or @@ -347,7 +348,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. - if (MF.getSubtarget().getFrameLowering()->hasReservedCallFrame(MF)) + if (TFI->hasReservedCallFrame(MF)) return true; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. @@ -357,9 +358,9 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const ARMFrameLowering *TFI = getFrameLowering(MF); const Function *F = MF.getFunction(); - unsigned StackAlign = - MF.getSubtarget().getFrameLowering()->getStackAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttribute(Attribute::StackAlignment)); @@ -378,7 +379,7 @@ cannotEliminateFrame(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); - const TargetFrameLowering *TFI = STI.getFrameLowering(); + const ARMFrameLowering *TFI = getFrameLowering(MF); if (TFI->hasFP(MF)) return getFramePointerReg(STI); @@ -517,7 +518,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // Note that the incoming offset is based on the SP value at function entry, // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const ARMFrameLowering *TFI = getFrameLowering(MF); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -694,8 +695,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); - const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering *>( - MF.getSubtarget().getFrameLowering()); + const ARMFrameLowering *TFI = getFrameLowering(MF); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 7dd21ecbe91b..27cf06b995a0 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -142,6 +142,9 @@ def CC_ARM_AAPCS : CallingConv<[ // Handles byval parameters. CCIfByVal<CCPassByVal<4, 4>>, + // The 'nest' parameter, if any, is passed in R12. + CCIfNest<CCAssignToReg<[R12]>>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4175b4af86e6..fdd0763ea608 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -49,8 +49,6 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -extern cl::opt<bool> EnableARMLongCalls; - namespace { // All possible address modes, plus some. @@ -685,7 +683,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { } unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) return 0; @@ -732,7 +730,7 @@ unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) { } bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT evt = TLI.getValueType(Ty, true); + EVT evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) return false; @@ -786,12 +784,13 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { return ARMComputeAddress(U->getOperand(0), Addr); case Instruction::IntToPtr: // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return ARMComputeAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return ARMComputeAddress(U->getOperand(0), Addr); break; case Instruction::GetElementPtr: { @@ -1365,7 +1364,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt) { Type *Ty = Src1Value->getType(); - EVT SrcEVT = TLI.getValueType(Ty, true); + EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); @@ -1557,7 +1556,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { return false; Value *Src = I->getOperand(0); - EVT SrcEVT = TLI.getValueType(Src->getType(), true); + EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true); if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); @@ -1750,7 +1749,7 @@ bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) { } bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); // We can get here in the case when we have a binary operation on a non-legal // type and the target independent selector doesn't know how to handle it. @@ -1790,7 +1789,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { } bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { - EVT FPVT = TLI.getValueType(I->getType(), true); + EVT FPVT = TLI.getValueType(DL, I->getType(), true); if (!FPVT.isSimple()) return false; MVT VT = FPVT.getSimpleVT(); @@ -2095,7 +2094,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { CallingConv::ID CC = F.getCallingConv(); if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; @@ -2122,7 +2121,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return false; unsigned SrcReg = Reg + VA.getValNo(); - EVT RVEVT = TLI.getValueType(RV->getType()); + EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; MVT RVVT = RVEVT.getSimpleVT(); MVT DestVT = VA.getValVT(); @@ -2173,7 +2172,7 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { unsigned ARMFastISel::getLibcallReg(const Twine &Name) { // Manually compute the global's type to avoid building it when unnecessary. Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0); - EVT LCREVT = TLI.getValueType(GVTy); + EVT LCREVT = TLI.getValueType(DL, GVTy); if (!LCREVT.isSimple()) return 0; GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false, @@ -2246,19 +2245,19 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { return false; unsigned CalleeReg = 0; - if (EnableARMLongCalls) { + if (Subtarget->genLongCalls()) { CalleeReg = getLibcallReg(TLI.getLibcallName(Call)); if (CalleeReg == 0) return false; } // Issue the call. - unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls); + unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls()); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); // BL / BLX don't take a predicate, but tBL / tBLX do. if (isThumb2) AddDefaultPred(MIB); - if (EnableARMLongCalls) + if (Subtarget->genLongCalls()) MIB.addReg(CalleeReg); else MIB.addExternalSymbol(TLI.getLibcallName(Call)); @@ -2380,7 +2379,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, bool UseReg = false; const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); - if (!GV || EnableARMLongCalls) UseReg = true; + if (!GV || Subtarget->genLongCalls()) UseReg = true; unsigned CalleeReg = 0; if (UseReg) { @@ -2576,8 +2575,8 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) { Value *Op = I->getOperand(0); EVT SrcVT, DestVT; - SrcVT = TLI.getValueType(Op->getType(), true); - DestVT = TLI.getValueType(I->getType(), true); + SrcVT = TLI.getValueType(DL, Op->getType(), true); + DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) return false; @@ -2742,8 +2741,8 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) { if (!SrcReg) return false; EVT SrcEVT, DestEVT; - SrcEVT = TLI.getValueType(SrcTy, true); - DestEVT = TLI.getValueType(DestTy, true); + SrcEVT = TLI.getValueType(DL, SrcTy, true); + DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) return false; @@ -2763,7 +2762,7 @@ bool ARMFastISel::SelectShift(const Instruction *I, return false; // Only handle i32 now. - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (DestVT != MVT::i32) return false; @@ -3026,7 +3025,7 @@ bool ARMFastISel::fastLowerArguments() { if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) return false; - EVT ArgVT = TLI.getValueType(ArgTy); + EVT ArgVT = TLI.getValueType(DL, ArgTy); if (!ArgVT.isSimple()) return false; switch (ArgVT.getSimpleVT().SimpleTy) { case MVT::i8: diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index a52e49780e27..6744000afe2b 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -800,7 +800,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // This is bad, if an interrupt is taken after the mov, sp is in an // inconsistent state. // Use the first callee-saved register as a scratch register. - assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && + assert(!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, ARMCC::AL, 0, TII); @@ -1470,7 +1470,8 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, // callee-saved vector registers after realigning the stack. The vst1 and vld1 // instructions take alignment hints that can improve performance. // -static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { +static void +checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); if (!SpillAlignedNEONRegs) return; @@ -1497,10 +1498,9 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { // callee-saved registers in order, but it can happen that there are holes in // the range. Registers above the hole will be spilled to the standard DPRCS // area. - MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned NumSpills = 0; for (; NumSpills < 8; ++NumSpills) - if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills)) + if (!SavedRegs.test(ARM::D8 + NumSpills)) break; // Don't do this for just one d-register. It's not worth it. @@ -1511,12 +1511,13 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills); // A scratch register is required for the vst1 / vld1 instructions. - MF.getRegInfo().setPhysRegUsed(ARM::R4); + SavedRegs.set(ARM::R4); } -void -ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); // This tells PEI to spill the FP as if it is any other callee-save register // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier @@ -1543,12 +1544,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: It will be better just to find spare register here. if (AFI->isThumb2Function() && (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) - MRI.setPhysRegUsed(ARM::R4); + SavedRegs.set(ARM::R4); if (AFI->isThumb1OnlyFunction()) { // Spill LR if Thumb1 function uses variable length argument lists. if (AFI->getArgRegsSaveSize() > 0) - MRI.setPhysRegUsed(ARM::LR); + SavedRegs.set(ARM::LR); // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know // for sure what the stack size will be, but for this, an estimate is good @@ -1558,23 +1559,23 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: It will be better just to find spare register here. unsigned StackSize = MFI->estimateStackSize(MF); if (MFI->hasVarSizedObjects() || StackSize > 508) - MRI.setPhysRegUsed(ARM::R4); + SavedRegs.set(ARM::R4); } // See if we can spill vector registers to aligned stack. - checkNumAlignedDPRCS2Regs(MF); + checkNumAlignedDPRCS2Regs(MF, SavedRegs); // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) - MRI.setPhysRegUsed(RegInfo->getBaseRegister()); + SavedRegs.set(RegInfo->getBaseRegister()); // Don't spill FP if the frame can be eliminated. This is determined - // by scanning the callee-save registers to see if any is used. + // by scanning the callee-save registers to see if any is modified. const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; - if (MRI.isPhysRegUsed(Reg)) { + if (SavedRegs.test(Reg)) { Spilled = true; CanEliminateFrame = false; } @@ -1668,7 +1669,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. // Spill LR as well so we can fold BX_RET to the registers restore (LDM). if (!LRSpilled && CS1Spilled) { - MRI.setPhysRegUsed(ARM::LR); + SavedRegs.set(ARM::LR); NumGPRSpills++; SmallVectorImpl<unsigned>::iterator LRPos; LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), @@ -1681,7 +1682,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (hasFP(MF)) { - MRI.setPhysRegUsed(FramePtr); + SavedRegs.set(FramePtr); auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), FramePtr); if (FPPos != UnspilledCS1GPRs.end()) @@ -1700,7 +1701,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill high register if the function is thumb if (!AFI->isThumbFunction() || isARMLowRegister(Reg) || Reg == ARM::LR) { - MRI.setPhysRegUsed(Reg); + SavedRegs.set(Reg); if (!MRI.isReserved(Reg)) ExtraCSSpill = true; break; @@ -1708,7 +1709,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { unsigned Reg = UnspilledCS2GPRs.front(); - MRI.setPhysRegUsed(Reg); + SavedRegs.set(Reg); if (!MRI.isReserved(Reg)) ExtraCSSpill = true; } @@ -1747,7 +1748,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (Extras.size() && NumExtras == 0) { for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - MRI.setPhysRegUsed(Extras[i]); + SavedRegs.set(Extras[i]); } } else if (!AFI->isThumb1OnlyFunction()) { // note: Thumb1 functions spill to R12, not the stack. Reserve a slot @@ -1761,7 +1762,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (ForceLRSpill) { - MRI.setPhysRegUsed(ARM::LR); + SavedRegs.set(ARM::LR); AFI->setLRIsSpilledForFarJump(true); } } diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index d763d17a506f..6fdc5eff5e47 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -54,8 +54,8 @@ public: unsigned &FrameReg, int SPAdj) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 50afb192b331..b110628a0a86 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -533,7 +533,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); return true; } @@ -556,7 +557,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); return true; @@ -702,7 +704,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); @@ -722,7 +725,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -900,7 +904,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } Offset = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), @@ -915,7 +920,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -964,7 +970,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); @@ -981,7 +988,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } ARM_AM::AddrOpc AddSub = ARM_AM::add; @@ -1215,7 +1223,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, MachineFrameInfo *MFI = MF->getFrameInfo(); if (MFI->getObjectAlignment(FI) < 4) MFI->setObjectAlignment(FI, 4); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); return true; } @@ -1237,7 +1246,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, MachineFrameInfo *MFI = MF->getFrameInfo(); if (MFI->getObjectAlignment(FI) < 4) MFI->setObjectAlignment(FI, 4); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); return true; @@ -1285,7 +1295,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); return true; } @@ -1314,7 +1325,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); return true; @@ -1343,7 +1355,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); return true; @@ -1438,7 +1451,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); } OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); @@ -2510,7 +2524,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (UseCP) { SDValue CPIdx = CurDAG->getTargetConstantPool( ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), - TLI->getPointerTy()); + TLI->getPointerTy(CurDAG->getDataLayout())); SDNode *ResNode; if (Subtarget->isThumb()) { @@ -2540,7 +2554,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::FrameIndex: { // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + SDValue TFI = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); if (Subtarget->isThumb1Only()) { // Set the alignment of the frame object to 4, to avoid having to generate // more than one ADD diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 4b2105b7442f..e335784f6d87 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -60,11 +60,6 @@ STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); -cl::opt<bool> -EnableARMLongCalls("arm-long-calls", cl::Hidden, - cl::desc("Generate calls via indirect call instructions"), - cl::init(false)); - static cl::opt<bool> ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), @@ -548,6 +543,27 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); + // NEON does not have single instruction CTTZ for vectors. + setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); + setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); + setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); + setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); + + setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); + setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); + setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); + setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); + + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); + + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); + // NEON only has FMA instructions as of VFP4. if (!Subtarget->hasVFP4()) { setOperationAction(ISD::FMA, MVT::v2f32, Expand); @@ -1149,8 +1165,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } -EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - if (!VT.isVector()) return getPointerTy(); +EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, + EVT VT) const { + if (!VT.isVector()) + return getPointerTy(DL); return VT.changeVectorElementTypeToInteger(); } @@ -1429,7 +1447,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); - PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), + StackPtr, PtrOff); return DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(LocMemOffset), false, false, 0); @@ -1453,7 +1472,8 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, else { assert(NextVA.isMemLoc()); if (!StackPtr.getNode()) - StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, + getPointerTy(DAG.getDataLayout())); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, @@ -1526,7 +1546,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), dl); - SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + SDValue StackPtr = + DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); RegsToPassVector RegsToPass; SmallVector<SDValue, 8> MemOpChains; @@ -1607,7 +1628,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned RegBegin, RegEnd; CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); unsigned int i, j; for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); @@ -1628,12 +1650,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (Flags.getByValSize() > 4*offset) { + auto PtrVT = getPointerTy(DAG.getDataLayout()); unsigned LocMemOffset = VA.getLocMemOffset(); SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); - SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, - StkPtrOff); + SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff); SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); - SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); + SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, MVT::i32); SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl, @@ -1693,8 +1715,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isARMFunc = false; bool isLocalARMFunc = false; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + auto PtrVt = getPointerTy(DAG.getDataLayout()); - if (EnableARMLongCalls) { + if (Subtarget->genLongCalls()) { assert((Subtarget->isTargetWindows() || getTargetMachine().getRelocationModel() == Reloc::Static) && "long-calls with non-static relocation model!"); @@ -1709,12 +1732,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); // Get the address of the callee into a register - SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(), false, false, + false, 0); } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { const char *Sym = S->getSymbol(); @@ -1724,29 +1746,28 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, ARMPCLabelIndex, 0); // Get the address of the callee into a register - SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(), false, false, + false, 0); } } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { const GlobalValue *GV = G->getGlobal(); isDirect = true; - bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); - bool isStub = (isExt && Subtarget->isTargetMachO()) && + bool isDef = GV->isStrongDefinitionForLinker(); + bool isStub = (!isDef && Subtarget->isTargetMachO()) && getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); // ARM call to a local ARM function is predicable. - isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); + isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); // tBX takes a register source operand. if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); - Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(), - DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), - 0, ARMII::MO_NONLAZY)); - Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, + Callee = DAG.getNode( + ARMISD::WrapperPIC, dl, PtrVt, + DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); + Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), false, false, true, 0); } else if (Subtarget->isTargetCOFF()) { assert(Subtarget->isTargetWindows() && @@ -1754,20 +1775,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned TargetFlags = GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG; - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0, - TargetFlags); + Callee = + DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags); if (GV->hasDLLImportStorageClass()) - Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(), - Callee), MachinePointerInfo::getGOT(), - false, false, false, 0); + Callee = + DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), + DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), + MachinePointerInfo::getGOT(), false, false, false, 0); } else { // On ELF targets for PIC code, direct calls should go through the PLT unsigned OpFlags = 0; if (Subtarget->isTargetELF() && getTargetMachine().getRelocationModel() == Reloc::PIC_) OpFlags = ARMII::MO_PLT; - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); + Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { isDirect = true; @@ -1781,22 +1802,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, ARMPCLabelIndex, 4); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(), false, false, + false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); - Callee = DAG.getNode(ARMISD::PIC_ADD, dl, - getPointerTy(), Callee, PICLabel); + Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); } else { unsigned OpFlags = 0; // On ELF targets for PIC code, direct calls should go through the PLT if (Subtarget->isTargetELF() && getTargetMachine().getRelocationModel() == Reloc::PIC_) OpFlags = ARMII::MO_PLT; - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); + Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags); } } @@ -2433,7 +2452,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = 0; SDLoc DL(Op); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; @@ -2462,7 +2481,7 @@ SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const { SDLoc dl(GA); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -2508,7 +2527,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SDLoc dl(GA); SDValue Offset; SDValue Chain = DAG.getEntryNode(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Get the Thread Pointer SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); @@ -2574,7 +2593,7 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { @@ -2617,7 +2636,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); @@ -2648,7 +2667,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); const ARMII::TOF TargetFlags = (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; SDLoc DL(Op); @@ -2672,7 +2691,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = @@ -2716,14 +2735,14 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1)); } case Intrinsic::arm_thread_pointer: { - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); } case Intrinsic::eh_sjlj_lsda: { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; unsigned PCAdj = (RelocM != Reloc::PIC_) @@ -2820,7 +2839,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDLoc dl(Op); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), @@ -2850,7 +2869,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); // Create load node to retrieve arguments from the stack. - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0); @@ -2904,8 +2923,9 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, if (REnd != RBegin) ArgOffset = -4 * (ARM::R4 - RBegin); + auto PtrVT = getPointerTy(DAG.getDataLayout()); int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false); - SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT); SmallVector<SDValue, 4> MemOps; const TargetRegisterClass *RC = @@ -2918,8 +2938,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(OrigArg, 4 * i), false, false, 0); MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getConstant(4, dl, getPointerTy())); + FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); } if (!MemOps.empty()) @@ -3013,6 +3032,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); + auto PtrVT = getPointerTy(DAG.getDataLayout()); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -3035,7 +3055,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue ArgValue2; if (VA.isMemLoc()) { int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0); @@ -3122,7 +3142,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg, CurByValIndex, VA.getLocMemOffset(), Flags.getByValSize()); - InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); + InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); CCInfo.nextInRegsParam(); } else { unsigned FIOffset = VA.getLocMemOffset(); @@ -3130,7 +3150,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, FIOffset, true); // Create load nodes to retrieve arguments from the stack. - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0)); @@ -3855,7 +3875,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Index = Op.getOperand(2); SDLoc dl(Op); - EVT PTy = getPointerTy(); + EVT PTy = getPointerTy(DAG.getDataLayout()); JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); @@ -4102,8 +4122,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned ARMTargetLowering::getRegisterByName(const char* RegName, - EVT VT) const { +unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const { unsigned Reg = StringSwitch<unsigned>(RegName) .Case("sp", ARM::SP) .Default(0); @@ -4163,7 +4183,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { SDValue Cvt; - if (TLI.isBigEndian() && SrcVT.isVector() && + if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() && SrcVT.getVectorNumElements() > 1) Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), @@ -4283,8 +4303,82 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { - EVT VT = N->getValueType(0); SDLoc dl(N); + EVT VT = N->getValueType(0); + if (VT.isVector()) { + assert(ST->hasNEON()); + + // Compute the least significant set bit: LSB = X & -X + SDValue X = N->getOperand(0); + SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X); + SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX); + + EVT ElemTy = VT.getVectorElementType(); + + if (ElemTy == MVT::i8) { + // Compute with: cttz(x) = ctpop(lsb - 1) + SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(1, dl, ElemTy)); + SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); + return DAG.getNode(ISD::CTPOP, dl, VT, Bits); + } + + if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) && + (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) { + // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0 + unsigned NumBits = ElemTy.getSizeInBits(); + SDValue WidthMinus1 = + DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(NumBits - 1, dl, ElemTy)); + SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB); + return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ); + } + + // Compute with: cttz(x) = ctpop(lsb - 1) + + // Since we can only compute the number of bits in a byte with vcnt.8, we + // have to gather the result with pairwise addition (vpaddl) for i16, i32, + // and i64. + + // Compute LSB - 1. + SDValue Bits; + if (ElemTy == MVT::i64) { + // Load constant 0xffff'ffff'ffff'ffff to register. + SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(0x1eff, dl, MVT::i32)); + Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF); + } else { + SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, + DAG.getTargetConstant(1, dl, ElemTy)); + Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); + } + + // Count #bits with vcnt.8. + EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; + SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits); + SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8); + + // Gather the #bits with vpaddl (pairwise add.) + EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; + SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit, + DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), + Cnt8); + if (ElemTy == MVT::i16) + return Cnt16; + + EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32; + SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit, + DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), + Cnt16); + if (ElemTy == MVT::i32) + return Cnt32; + + assert(ElemTy == MVT::i64); + SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32), + Cnt32); + return Cnt64; + } if (!ST->hasV6T2Ops()) return SDValue(); @@ -4730,7 +4824,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, ImmMask <<= 1; } - if (DAG.getTargetLoweringInfo().isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) // swap higher and lower 32 bit word Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); @@ -5868,7 +5962,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, if (BVN->getValueType(0) != MVT::v4i32 || BVN->getOpcode() != ISD::BUILD_VECTOR) return false; - unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; + unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; unsigned HiElt = 1 - LoElt; ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); @@ -6013,7 +6107,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { SDNode *BVN = N->getOperand(0).getNode(); assert(BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); - unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; + unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); } @@ -6342,18 +6436,19 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + auto PtrVT = getPointerTy(DAG.getDataLayout()); MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); // Create stack object for sret. - const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy); - const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy); + auto &DL = DAG.getDataLayout(); + const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); + const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); - SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy()); + SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL)); ArgListTy Args; ArgListEntry Entry; @@ -6373,7 +6468,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { const char *LibcallName = (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; - SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) @@ -6387,7 +6482,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(), false, false, false, 0); // Address of cos field. - SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet, + SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet, DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl)); SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo(), false, false, false, 0); @@ -6487,7 +6582,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); - case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); @@ -6845,9 +6941,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. - unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); + unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getDataLayout()->getTypeAllocSize(C->getType()); + Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); @@ -6935,9 +7031,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI, const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. - unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); + unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getDataLayout()->getTypeAllocSize(C->getType()); + Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); @@ -7313,9 +7409,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, const Constant *C = ConstantInt::get(Int32Ty, LoopSize); // MachineConstantPool wants an explicit alignment. - unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); + unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) - Align = getDataLayout()->getTypeAllocSize(C->getType()); + Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); if (IsThumb1) @@ -8001,7 +8097,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Build operand list. SmallVector<SDValue, 8> Ops; Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl, - TLI.getPointerTy())); + TLI.getPointerTy(DAG.getDataLayout()))); // Input is the vector. Ops.push_back(Vec); @@ -8681,7 +8777,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, std::min(4U, LD->getAlignment() / 2)); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); - if (DCI.DAG.getTargetLoweringInfo().isBigEndian()) + if (DCI.DAG.getDataLayout().isBigEndian()) std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); return Result; @@ -9312,7 +9408,9 @@ static SDValue PerformSTORECombine(SDNode *N, SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i < NumElems; ++i) - ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; + ShuffleVec[i] = DAG.getDataLayout().isBigEndian() + ? (i + 1) * SizeRatio - 1 + : i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); @@ -9339,8 +9437,8 @@ static SDValue PerformSTORECombine(SDNode *N, assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); SmallVector<SDValue, 8> Chains; - SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, DL, - TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL, + TLI.getPointerTy(DAG.getDataLayout())); SDValue BasePtr = St->getBasePtr(); // Perform one or more big stores into memory. @@ -9367,7 +9465,7 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; - bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); + bool isBigEndian = DAG.getDataLayout().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, @@ -10078,7 +10176,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, // For any little-endian targets with neon, we can support unaligned ld/st // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. // A big-endian target may also explicitly support unaligned accesses - if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { + if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) { if (Fast) *Fast = true; return true; @@ -10317,10 +10415,10 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. -bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { - EVT VT = getValueType(Ty, true); + EVT VT = getValueType(DL, Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; @@ -10664,7 +10762,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. ARMTargetLowering::ConstraintType -ARMTargetLowering::getConstraintType(const std::string &Constraint) const { +ARMTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; @@ -10723,10 +10821,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight( } typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; -RCPair -ARMTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const { +RCPair ARMTargetLowering::getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { @@ -10974,7 +11070,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { } SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), - getPointerTy()); + getPointerTy(DAG.getDataLayout())); Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr); @@ -11083,7 +11179,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::arm_neon_vld4lane: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; + auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); + uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; @@ -11103,12 +11200,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::arm_neon_vst4lane: { Info.opc = ISD::INTRINSIC_VOID; // Conservatively set memVT to the entire set of vectors stored. + auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); unsigned NumElts = 0; for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeAllocSize(ArgTy) / 8; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); @@ -11122,12 +11220,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, } case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { + auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = true; Info.writeMem = false; @@ -11135,12 +11234,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, } case Intrinsic::arm_stlex: case Intrinsic::arm_strex: { + auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; - Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.vol = true; Info.readMem = false; Info.writeMem = true; @@ -11427,9 +11527,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( VectorType *VecTy = Shuffles[0]->getType(); Type *EltTy = VecTy->getVectorElementType(); - const DataLayout *DL = getDataLayout(); - unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy); - bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64; + const DataLayout &DL = LI->getModule()->getDataLayout(); + unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); + bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't // support i64/f64 element). @@ -11439,8 +11539,8 @@ bool ARMTargetLowering::lowerInterleavedLoad( // A pointer vector can not be the return type of the ldN intrinsics. Need to // load integer vectors first and then convert to pointer vectors. if (EltTy->isPointerTy()) - VecTy = VectorType::get(DL->getIntPtrType(EltTy), - VecTy->getVectorNumElements()); + VecTy = + VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements()); static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2, Intrinsic::arm_neon_vld3, @@ -11517,9 +11617,9 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, Type *EltTy = VecTy->getVectorElementType(); VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); - const DataLayout *DL = getDataLayout(); - unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy); - bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64; + const DataLayout &DL = SI->getModule()->getDataLayout(); + unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; // Skip illegal sub vector types and vector types of i64/f64 element (vstN // doesn't support i64/f64 element). @@ -11533,7 +11633,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, // StN intrinsics don't support pointer vectors as arguments. Convert pointer // vectors to integer vectors. if (EltTy->isPointerTy()) { - Type *IntTy = DL->getIntPtrType(EltTy); + Type *IntTy = DL.getIntPtrType(EltTy); // Convert to the corresponding integer vector. Type *IntVecTy = diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 74396392f8e3..efc9020c193a 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -249,7 +249,8 @@ namespace llvm { } /// getSetCCResultType - Return the value type to use for ISD::SETCC. - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, @@ -286,8 +287,8 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, - unsigned AS) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS) const override; bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal @@ -324,8 +325,7 @@ namespace llvm { bool ExpandInlineAsm(CallInst *CI) const override; - ConstraintType - getConstraintType(const std::string &Constraint) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. @@ -334,8 +334,7 @@ namespace llvm { std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is @@ -345,8 +344,8 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "Q") return InlineAsm::Constraint_Q; else if (ConstraintCode.size() == 2) { @@ -533,7 +532,8 @@ namespace llvm { SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - unsigned getRegisterByName(const char* RegName, EVT VT) const override; + unsigned getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b8cac135baf6..61c45af26fe1 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -306,8 +306,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; -def IsLE : Predicate<"getTargetLowering()->isLittleEndian()">; -def IsBE : Predicate<"getTargetLowering()->isBigEndian()">; +def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; +def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 245c9e869bf6..37352810c99f 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -31,11 +31,13 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -65,12 +67,18 @@ namespace { static char ID; ARMLoadStoreOpt() : MachineFunctionPass(ID) {} + const MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; const ARMSubtarget *STI; const TargetLowering *TL; ARMFunctionInfo *AFI; - RegScavenger *RS; + LivePhysRegs LiveRegs; + RegisterClassInfo RegClassInfo; + MachineBasicBlock::const_iterator LiveRegPos; + bool LiveRegsValid; + bool RegClassInfoValid; bool isThumb1, isThumb2; bool runOnMachineFunction(MachineFunction &Fn) override; @@ -80,64 +88,60 @@ namespace { } private: + /// A set of load/store MachineInstrs with same base register sorted by + /// offset. struct MemOpQueueEntry { - int Offset; - unsigned Reg; - bool isKill; - unsigned Position; - MachineBasicBlock::iterator MBBI; - bool Merged; - MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, - MachineBasicBlock::iterator i) - : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} + MachineInstr *MI; + int Offset; ///< Load/Store offset. + unsigned Position; ///< Position as counted from end of basic block. + MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position) + : MI(MI), Offset(Offset), Position(Position) {} }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; - typedef MemOpQueue::iterator MemOpQueueIter; - void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, - const MemOpQueue &MemOps, unsigned DefReg, - unsigned RangeBegin, unsigned RangeEnd); + /// A set of MachineInstrs that fulfill (nearly all) conditions to get + /// merged into a LDM/STM. + struct MergeCandidate { + /// List of instructions ordered by load/store offset. + SmallVector<MachineInstr*, 4> Instrs; + /// Index in Instrs of the instruction being latest in the schedule. + unsigned LatestMIIdx; + /// Index in Instrs of the instruction being earliest in the schedule. + unsigned EarliestMIIdx; + /// Index into the basic block where the merged instruction will be + /// inserted. (See MemOpQueueEntry.Position) + unsigned InsertPos; + /// Whether the instructions can be merged into a ldm/stm instruction. + bool CanMergeToLSMulti; + /// Whether the instructions can be merged into a ldrd/strd instruction. + bool CanMergeToLSDouble; + }; + SpecificBumpPtrAllocator<MergeCandidate> Allocator; + SmallVector<const MergeCandidate*,4> Candidates; + SmallVector<MachineInstr*,4> MergeBaseCandidates; + + void moveLiveRegsBefore(const MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator Before); + unsigned findFreeReg(const TargetRegisterClass &RegClass); void UpdateBaseRegUses(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc dl, unsigned Base, unsigned WordOffset, + DebugLoc DL, unsigned Base, unsigned WordOffset, ARMCC::CondCodes Pred, unsigned PredReg); - bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int Offset, unsigned Base, bool BaseKill, unsigned Opcode, - ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, - DebugLoc dl, - ArrayRef<std::pair<unsigned, bool> > Regs, - ArrayRef<unsigned> ImpDefs); - void MergeOpsUpdate(MachineBasicBlock &MBB, - MemOpQueue &MemOps, - unsigned memOpsBegin, - unsigned memOpsEnd, - unsigned insertAfter, - int Offset, - unsigned Base, - bool BaseKill, - unsigned Opcode, - ARMCC::CondCodes Pred, - unsigned PredReg, - unsigned Scratch, - DebugLoc dl, - SmallVectorImpl<MachineBasicBlock::iterator> &Merges); - void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, - unsigned Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps, - SmallVectorImpl<MachineBasicBlock::iterator> &Merges); - void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); + MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, + bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, + DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs); + MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, + bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, + DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const; + void FormCandidates(const MemOpQueue &MemOps); + MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); - bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const TargetInstrInfo *TII, - bool &Advance, - MachineBasicBlock::iterator &I); - bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - bool &Advance, - MachineBasicBlock::iterator &I); + bool MergeBaseUpdateLoadStore(MachineInstr *MI); + bool MergeBaseUpdateLSMultiple(MachineInstr *MI); + bool MergeBaseUpdateLSDouble(MachineInstr &MI) const; bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -185,6 +189,14 @@ static int getMemoryOpOffset(const MachineInstr *MI) { return Offset; } +static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) { + return MI.getOperand(1); +} + +static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) { + return MI.getOperand(0); +} + static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) { switch (Opcode) { default: llvm_unreachable("Unhandled opcode!"); @@ -348,6 +360,10 @@ static bool isi32Store(unsigned Opc) { return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); } +static bool isLoadSingle(unsigned Opc) { + return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; +} + static unsigned getImmScale(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Unhandled opcode!"); @@ -365,12 +381,55 @@ static unsigned getImmScale(unsigned Opc) { } } +static unsigned getLSMultipleTransferSize(const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return 0; + case ARM::LDRi12: + case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: + case ARM::tLDRspi: + case ARM::tSTRspi: + case ARM::t2LDRi8: + case ARM::t2LDRi12: + case ARM::t2STRi8: + case ARM::t2STRi12: + case ARM::VLDRS: + case ARM::VSTRS: + return 4; + case ARM::VLDRD: + case ARM::VSTRD: + return 8; + case ARM::LDMIA: + case ARM::LDMDA: + case ARM::LDMDB: + case ARM::LDMIB: + case ARM::STMIA: + case ARM::STMDA: + case ARM::STMDB: + case ARM::STMIB: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: + case ARM::t2LDMIA: + case ARM::t2LDMDB: + case ARM::t2STMIA: + case ARM::t2STMDB: + case ARM::VLDMSIA: + case ARM::VSTMSIA: + return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; + case ARM::VLDMDIA: + case ARM::VSTMDIA: + return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; + } +} + /// Update future uses of the base register with the offset introduced /// due to writeback. This function only works on Thumb1. void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc dl, unsigned Base, + DebugLoc DL, unsigned Base, unsigned WordOffset, ARMCC::CondCodes Pred, unsigned PredReg) { assert(isThumb1 && "Can only update base register uses for Thumb1!"); @@ -398,7 +457,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, Offset = MO.getImm() - WordOffset * getImmScale(Opc); // If storing the base register, it needs to be reset first. - unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); + unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg(); if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) MO.setImm(Offset); @@ -439,7 +498,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, if (InsertSub) { // An instruction above couldn't be updated, so insert a sub. - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); return; } @@ -457,31 +516,65 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, // See PR21029. if (MBBI != MBB.end()) --MBBI; AddDefaultT1CC( - BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true) .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg); } } +/// Return the first register of class \p RegClass that is not in \p Regs. +unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) { + if (!RegClassInfoValid) { + RegClassInfo.runOnMachineFunction(*MF); + RegClassInfoValid = true; + } + + for (unsigned Reg : RegClassInfo.getOrder(&RegClass)) + if (!LiveRegs.contains(Reg)) + return Reg; + return 0; +} + +/// Compute live registers just before instruction \p Before (in normal schedule +/// direction). Computes backwards so multiple queries in the same block must +/// come in reverse order. +void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator Before) { + // Initialize if we never queried in this block. + if (!LiveRegsValid) { + LiveRegs.init(TRI); + LiveRegs.addLiveOuts(&MBB, true); + LiveRegPos = MBB.end(); + LiveRegsValid = true; + } + // Move backward just before the "Before" position. + while (LiveRegPos != Before) { + --LiveRegPos; + LiveRegs.stepBackward(*LiveRegPos); + } +} + +static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs, + unsigned Reg) { + for (const std::pair<unsigned, bool> &R : Regs) + if (R.first == Reg) + return true; + return false; +} + /// Create and insert a LDM or STM with Base as base register and registers in /// Regs as the register operands that would be loaded / stored. It returns /// true if the transformation is done. -bool -ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - int Offset, unsigned Base, bool BaseKill, - unsigned Opcode, ARMCC::CondCodes Pred, - unsigned PredReg, unsigned Scratch, DebugLoc dl, - ArrayRef<std::pair<unsigned, bool> > Regs, - ArrayRef<unsigned> ImpDefs) { - // Only a single register to load / store. Don't bother. +MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, + bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, + DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) { unsigned NumRegs = Regs.size(); - if (NumRegs <= 1) - return false; + assert(NumRegs > 1); // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. // Compute liveness information for that register to make the decision. bool SafeToClobberCPSR = !isThumb1 || - (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == + (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) == MachineBasicBlock::LQR_Dead); bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. @@ -489,17 +582,14 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // Exception: If the base register is in the input reglist, Thumb1 LDM is // non-writeback. // It's also not possible to merge an STR of the base register in Thumb1. - if (isThumb1) - for (const std::pair<unsigned, bool> &R : Regs) - if (Base == R.first) { - assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); - if (Opcode == ARM::tLDRi) { - Writeback = false; - break; - } else if (Opcode == ARM::tSTRi) { - return false; - } - } + if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) { + assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); + if (Opcode == ARM::tLDRi) { + Writeback = false; + } else if (Opcode == ARM::tSTRi) { + return nullptr; + } + } ARM_AM::AMSubMode Mode = ARM_AM::ia; // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. @@ -516,18 +606,18 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { // Check if this is a supported opcode before inserting instructions to // calculate a new base register. - if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; + if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr; // If starting offset isn't zero, insert a MI to materialize a new base. // But only do so if it is cost effective, i.e. merging more than two // loads / stores. if (NumRegs <= 2) - return false; + return nullptr; // On Thumb1, it's not worth materializing a new base register without // clobbering the CPSR (i.e. not using ADDS/SUBS). if (!SafeToClobberCPSR) - return false; + return nullptr; unsigned NewBase; if (isi32Load(Opcode)) { @@ -535,10 +625,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // use as the new base. NewBase = Regs[NumRegs-1].first; } else { - // Use the scratch register to use as a new base. - NewBase = Scratch; + // Find a free register that we can use as scratch register. + moveLiveRegsBefore(MBB, InsertBefore); + // The merged instruction does not exist yet but will use several Regs if + // it is a Store. + if (!isLoadSingle(Opcode)) + for (const std::pair<unsigned, bool> &R : Regs) + LiveRegs.addReg(R.first); + + NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass); if (NewBase == 0) - return false; + return nullptr; } int BaseOpc = @@ -557,7 +654,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (!TL->isLegalAddImmediate(Offset)) // FIXME: Try add with register operand? - return false; // Probably not worth it then. + return nullptr; // Probably not worth it then. + + // We can only append a kill flag to the add/sub input if the value is not + // used in the register list of the stm as well. + bool KillOldBase = BaseKill && + (!isi32Store(Opcode) || !ContainsReg(Regs, Base)); if (isThumb1) { // Thumb1: depending on immediate size, use either @@ -572,43 +674,44 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, !STI->hasV6Ops()) { // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr if (Pred != ARMCC::AL) - return false; - BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) - .addReg(Base, getKillRegState(BaseKill)); + return nullptr; + BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase) + .addReg(Base, getKillRegState(KillOldBase)); } else - BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) - .addReg(Base, getKillRegState(BaseKill)) + BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase) + .addReg(Base, getKillRegState(KillOldBase)) .addImm(Pred).addReg(PredReg); - // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. + // The following ADDS/SUBS becomes an update. Base = NewBase; - BaseKill = false; + KillOldBase = true; } if (BaseOpc == ARM::tADDrSPi) { assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); - BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4) + BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4) .addImm(Pred).addReg(PredReg); } else - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + AddDefaultT1CC( + BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true) + .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) .addImm(Pred).addReg(PredReg); } else { - BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset) .addImm(Pred).addReg(PredReg).addReg(0); } Base = NewBase; BaseKill = true; // New base is always killed straight away. } - bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || - Opcode == ARM::VLDRD); + bool isDef = isLoadSingle(Opcode); // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with // base register writeback. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); - if (!Opcode) return false; + if (!Opcode) + return nullptr; // Check if a Thumb1 LDM/STM merge is safe. This is the case if: // - There is no writeback (LDM of base register), @@ -619,7 +722,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // It's safe to return here since the code to materialize a new base register // above is also conditional on SafeToClobberCPSR. if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) - return false; + return nullptr; MachineInstrBuilder MIB; @@ -628,7 +731,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // Update tLDMIA with writeback if necessary. Opcode = ARM::tLDMIA_UPD; - MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); // Thumb1: we might need to set base writeback when building the MI. MIB.addReg(Base, getDefRegState(true)) @@ -637,381 +740,257 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // The base isn't dead after a merged instruction with writeback. // Insert a sub instruction after the newly formed instruction to reset. if (!BaseKill) - UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); + UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); } else { // No writeback, simply build the MachineInstr. - MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); MIB.addReg(Base, getKillRegState(BaseKill)); } MIB.addImm(Pred).addReg(PredReg); for (const std::pair<unsigned, bool> &R : Regs) - MIB = MIB.addReg(R.first, getDefRegState(isDef) - | getKillRegState(R.second)); + MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); - // Add implicit defs for super-registers. - for (unsigned ImpDef : ImpDefs) - MIB.addReg(ImpDef, RegState::ImplicitDefine); - - return true; + return MIB.getInstr(); } -/// Find all instructions using a given imp-def within a range. -/// -/// We are trying to combine a range of instructions, one of which (located at -/// position RangeBegin) implicitly defines a register. The final LDM/STM will -/// be placed at RangeEnd, and so any uses of this definition between RangeStart -/// and RangeEnd must be modified to use an undefined value. -/// -/// The live range continues until we find a second definition or one of the -/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so -/// we must consider all uses and decide which are relevant in a second pass. -void ARMLoadStoreOpt::findUsesOfImpDef( - SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, - unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { - std::map<unsigned, MachineOperand *> Uses; - unsigned LastLivePos = RangeEnd; - - // First we find all uses of this register with Position between RangeBegin - // and RangeEnd, any or all of these could be uses of a definition at - // RangeBegin. We also record the latest position a definition at RangeBegin - // would be considered live. - for (unsigned i = 0; i < MemOps.size(); ++i) { - MachineInstr &MI = *MemOps[i].MBBI; - unsigned MIPosition = MemOps[i].Position; - if (MIPosition <= RangeBegin || MIPosition > RangeEnd) - continue; - - // If this instruction defines the register, then any later use will be of - // that definition rather than ours. - if (MI.definesRegister(DefReg)) - LastLivePos = std::min(LastLivePos, MIPosition); - - MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); - if (!UseOp) - continue; - - // If this instruction kills the register then (assuming liveness is - // correct when we start) we don't need to think about anything after here. - if (UseOp->isKill()) - LastLivePos = std::min(LastLivePos, MIPosition); - - Uses[MIPosition] = UseOp; - } - - // Now we traverse the list of all uses, and append the ones that actually use - // our definition to the requested list. - for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), - E = Uses.end(); - I != E; ++I) { - // List is sorted by position so once we've found one out of range there - // will be no more to consider. - if (I->first > LastLivePos) - break; - UsesOfImpDefs.push_back(I->second); +MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, + bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, + DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const { + bool IsLoad = isi32Load(Opcode); + assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); + unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; + + assert(Regs.size() == 2); + MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL, + TII->get(LoadStoreOpcode)); + if (IsLoad) { + MIB.addReg(Regs[0].first, RegState::Define) + .addReg(Regs[1].first, RegState::Define); + } else { + MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second)) + .addReg(Regs[1].first, getKillRegState(Regs[1].second)); } + MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + return MIB.getInstr(); } /// Call MergeOps and update MemOps and merges accordingly on success. -void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, - MemOpQueue &memOps, - unsigned memOpsBegin, unsigned memOpsEnd, - unsigned insertAfter, int Offset, - unsigned Base, bool BaseKill, - unsigned Opcode, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, - DebugLoc dl, - SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { - // First calculate which of the registers should be killed by the merged - // instruction. - const unsigned insertPos = memOps[insertAfter].Position; - SmallSet<unsigned, 4> KilledRegs; - DenseMap<unsigned, unsigned> Killer; - for (unsigned i = 0, e = memOps.size(); i != e; ++i) { - if (i == memOpsBegin) { - i = memOpsEnd; - if (i == e) - break; - } - if (memOps[i].Position < insertPos && memOps[i].isKill) { - unsigned Reg = memOps[i].Reg; +MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { + const MachineInstr *First = Cand.Instrs.front(); + unsigned Opcode = First->getOpcode(); + bool IsLoad = isLoadSingle(Opcode); + SmallVector<std::pair<unsigned, bool>, 8> Regs; + SmallVector<unsigned, 4> ImpDefs; + DenseSet<unsigned> KilledRegs; + // Determine list of registers and list of implicit super-register defs. + for (const MachineInstr *MI : Cand.Instrs) { + const MachineOperand &MO = getLoadStoreRegOp(*MI); + unsigned Reg = MO.getReg(); + bool IsKill = MO.isKill(); + if (IsKill) KilledRegs.insert(Reg); - Killer[Reg] = i; + Regs.push_back(std::make_pair(Reg, IsKill)); + + if (IsLoad) { + // Collect any implicit defs of super-registers, after merging we can't + // be sure anymore that we properly preserved these live ranges and must + // removed these implicit operands. + for (const MachineOperand &MO : MI->implicit_operands()) { + if (!MO.isReg() || !MO.isDef() || MO.isDead()) + continue; + assert(MO.isImplicit()); + unsigned DefReg = MO.getReg(); + + if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end()) + continue; + // We can ignore cases where the super-reg is read and written. + if (MI->readsRegister(DefReg)) + continue; + ImpDefs.push_back(DefReg); + } } } - for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0); - if (TransferOp.isUse() && TransferOp.getReg() == Base) - BaseKill = false; + // Attempt the merge. + typedef MachineBasicBlock::iterator iterator; + MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; + iterator InsertBefore = std::next(iterator(LatestMI)); + MachineBasicBlock &MBB = *LatestMI->getParent(); + unsigned Offset = getMemoryOpOffset(First); + unsigned Base = getLoadStoreBaseOp(*First).getReg(); + bool BaseKill = LatestMI->killsRegister(Base); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg); + DebugLoc DL = First->getDebugLoc(); + MachineInstr *Merged = nullptr; + if (Cand.CanMergeToLSDouble) + Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, + Opcode, Pred, PredReg, DL, Regs); + if (!Merged && Cand.CanMergeToLSMulti) + Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, + Opcode, Pred, PredReg, DL, Regs); + if (!Merged) + return nullptr; + + // Determine earliest instruction that will get removed. We then keep an + // iterator just above it so the following erases don't invalidated it. + iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]); + bool EarliestAtBegin = false; + if (EarliestI == MBB.begin()) { + EarliestAtBegin = true; + } else { + EarliestI = std::prev(EarliestI); } - SmallVector<std::pair<unsigned, bool>, 8> Regs; - SmallVector<unsigned, 8> ImpDefs; - SmallVector<MachineOperand *, 8> UsesOfImpDefs; - for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - unsigned Reg = memOps[i].Reg; - // If we are inserting the merged operation after an operation that - // uses the same register, make sure to transfer any kill flag. - bool isKill = memOps[i].isKill || KilledRegs.count(Reg); - Regs.push_back(std::make_pair(Reg, isKill)); - - // Collect any implicit defs of super-registers. They must be preserved. - for (const MachineOperand &MO : memOps[i].MBBI->operands()) { - if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead()) - continue; - unsigned DefReg = MO.getReg(); - if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) - ImpDefs.push_back(DefReg); - - // There may be other uses of the definition between this instruction and - // the eventual LDM/STM position. These should be marked undef if the - // merge takes place. - findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, - insertPos); + // Remove instructions which have been merged. + for (MachineInstr *MI : Cand.Instrs) + MBB.erase(MI); + + // Determine range between the earliest removed instruction and the new one. + if (EarliestAtBegin) + EarliestI = MBB.begin(); + else + EarliestI = std::next(EarliestI); + auto FixupRange = make_range(EarliestI, iterator(Merged)); + + if (isLoadSingle(Opcode)) { + // If the previous loads defined a super-reg, then we have to mark earlier + // operands undef; Replicate the super-reg def on the merged instruction. + for (MachineInstr &MI : FixupRange) { + for (unsigned &ImpDefReg : ImpDefs) { + for (MachineOperand &MO : MI.implicit_operands()) { + if (!MO.isReg() || MO.getReg() != ImpDefReg) + continue; + if (MO.readsReg()) + MO.setIsUndef(); + else if (MO.isDef()) + ImpDefReg = 0; + } + } } - } - // Try to do the merge. - MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; - ++Loc; - if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, - Pred, PredReg, Scratch, dl, Regs, ImpDefs)) - return; - - // Merge succeeded, update records. - Merges.push_back(std::prev(Loc)); - - // In gathering loads together, we may have moved the imp-def of a register - // past one of its uses. This is OK, since we know better than the rest of - // LLVM what's OK with ARM loads and stores; but we still have to adjust the - // affected uses. - for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), - E = UsesOfImpDefs.end(); - I != E; ++I) - (*I)->setIsUndef(); - - for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - // Remove kill flags from any memops that come before insertPos. - if (Regs[i-memOpsBegin].second) { - unsigned Reg = Regs[i-memOpsBegin].first; - if (KilledRegs.count(Reg)) { - unsigned j = Killer[Reg]; - int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); - assert(Idx >= 0 && "Cannot find killing operand"); - memOps[j].MBBI->getOperand(Idx).setIsKill(false); - memOps[j].isKill = false; + MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged); + for (unsigned ImpDef : ImpDefs) + MIB.addReg(ImpDef, RegState::ImplicitDefine); + } else { + // Remove kill flags: We are possibly storing the values later now. + assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD); + for (MachineInstr &MI : FixupRange) { + for (MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !MO.isKill()) + continue; + if (KilledRegs.count(MO.getReg())) + MO.setIsKill(false); } - memOps[i].isKill = true; } - MBB.erase(memOps[i].MBBI); - // Update this memop to refer to the merged instruction. - // We may need to move kill flags again. - memOps[i].Merged = true; - memOps[i].MBBI = Merges.back(); - memOps[i].Position = insertPos; + assert(ImpDefs.empty()); } - // Update memOps offsets, since they may have been modified by MergeOps. - for (auto &MemOp : memOps) { - MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); - } + return Merged; } -/// Merge a number of load / store instructions into one or more load / store -/// multiple instructions. -void -ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, - unsigned Base, unsigned Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps, - SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { - bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); - int Offset = MemOps[SIndex].Offset; - int SOffset = Offset; - unsigned insertAfter = SIndex; - MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; - DebugLoc dl = Loc->getDebugLoc(); - const MachineOperand &PMO = Loc->getOperand(0); - unsigned PReg = PMO.getReg(); - unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); - unsigned Count = 1; - unsigned Limit = ~0U; - bool BaseKill = false; - // vldm / vstm limit are 32 for S variants, 16 for D variants. +static bool isValidLSDoubleOffset(int Offset) { + unsigned Value = abs(Offset); + // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally + // multiplied by 4. + return (Value % 4) == 0 && Value < 1024; +} - switch (Opcode) { - default: break; - case ARM::VSTRS: - Limit = 32; - break; - case ARM::VSTRD: - Limit = 16; - break; - case ARM::VLDRD: - Limit = 16; - break; - case ARM::VLDRS: - Limit = 32; - break; - } +/// Find candidates for load/store multiple merge in list of MemOpQueueEntries. +void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { + const MachineInstr *FirstMI = MemOps[0].MI; + unsigned Opcode = FirstMI->getOpcode(); + bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); + unsigned Size = getLSMultipleTransferSize(FirstMI); + + unsigned SIndex = 0; + unsigned EIndex = MemOps.size(); + do { + // Look at the first instruction. + const MachineInstr *MI = MemOps[SIndex].MI; + int Offset = MemOps[SIndex].Offset; + const MachineOperand &PMO = getLoadStoreRegOp(*MI); + unsigned PReg = PMO.getReg(); + unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); + unsigned Latest = SIndex; + unsigned Earliest = SIndex; + unsigned Count = 1; + bool CanMergeToLSDouble = + STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset); + // ARM errata 602117: LDRD with base in list may result in incorrect base + // register when interrupted or faulted. + if (STI->isCortexM3() && isi32Load(Opcode) && + PReg == getLoadStoreBaseOp(*MI).getReg()) + CanMergeToLSDouble = false; + + bool CanMergeToLSMulti = true; + // On swift vldm/vstm starting with an odd register number as that needs + // more uops than single vldrs. + if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1) + CanMergeToLSMulti = false; + + // Merge following instructions where possible. + for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) { + int NewOffset = MemOps[I].Offset; + if (NewOffset != Offset + (int)Size) + break; + const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI); + unsigned Reg = MO.getReg(); + unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); + + // See if the current load/store may be part of a multi load/store. + bool PartOfLSMulti = CanMergeToLSMulti; + if (PartOfLSMulti) { + // Cannot load from SP + if (Reg == ARM::SP) + PartOfLSMulti = false; + // Register numbers must be in ascending order. + else if (RegNum <= PRegNum) + PartOfLSMulti = false; + // For VFP / NEON load/store multiples, the registers must be + // consecutive and within the limit on the number of registers per + // instruction. + else if (!isNotVFP && RegNum != PRegNum+1) + PartOfLSMulti = false; + } + // See if the current load/store may be part of a double load/store. + bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1; - for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { - int NewOffset = MemOps[i].Offset; - const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); - unsigned Reg = MO.getReg(); - unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); - // Register numbers must be in ascending order. For VFP / NEON load and - // store multiples, the registers must also be consecutive and within the - // limit on the number of registers per instruction. - if (Reg != ARM::SP && - NewOffset == Offset + (int)Size && - ((isNotVFP && RegNum > PRegNum) || - ((Count < Limit) && RegNum == PRegNum+1)) && - // On Swift we don't want vldm/vstm to start with a odd register num - // because Q register unaligned vldm/vstm need more uops. - (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { + if (!PartOfLSMulti && !PartOfLSDouble) + break; + CanMergeToLSMulti &= PartOfLSMulti; + CanMergeToLSDouble &= PartOfLSDouble; + // Track MemOp with latest and earliest position (Positions are + // counted in reverse). + unsigned Position = MemOps[I].Position; + if (Position < MemOps[Latest].Position) + Latest = I; + else if (Position > MemOps[Earliest].Position) + Earliest = I; + // Prepare for next MemOp. Offset += Size; PRegNum = RegNum; - ++Count; - } else { - // Can't merge this in. Try merge the earlier ones first. - // We need to compute BaseKill here because the MemOps may have been - // reordered. - BaseKill = Loc->killsRegister(Base); - - MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, - BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); - MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, - MemOps, Merges); - return; } - if (MemOps[i].Position > MemOps[insertAfter].Position) { - insertAfter = i; - Loc = MemOps[i].MBBI; - } - } - - BaseKill = Loc->killsRegister(Base); - MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, - Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); -} - -static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg) { - unsigned MyPredReg = 0; - if (!MI) - return false; - - bool CheckCPSRDef = false; - switch (MI->getOpcode()) { - default: return false; - case ARM::tSUBi8: - case ARM::t2SUBri: - case ARM::SUBri: - CheckCPSRDef = true; - break; - case ARM::tSUBspi: - break; - } - - // Make sure the offset fits in 8 bits. - if (Bytes == 0 || (Limit && Bytes >= Limit)) - return false; - - unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || - MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME - if (!(MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm() * Scale) == Bytes && - getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg)) - return false; - - return CheckCPSRDef ? !definesCPSR(MI) : true; -} - -static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg) { - unsigned MyPredReg = 0; - if (!MI) - return false; - - bool CheckCPSRDef = false; - switch (MI->getOpcode()) { - default: return false; - case ARM::tADDi8: - case ARM::t2ADDri: - case ARM::ADDri: - CheckCPSRDef = true; - break; - case ARM::tADDspi: - break; - } - - if (Bytes == 0 || (Limit && Bytes >= Limit)) - // Make sure the offset fits in 8 bits. - return false; - - unsigned Scale = (MI->getOpcode() == ARM::tADDspi || - MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME - if (!(MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm() * Scale) == Bytes && - getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg)) - return false; - - return CheckCPSRDef ? !definesCPSR(MI) : true; -} - -static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { - switch (MI->getOpcode()) { - default: return 0; - case ARM::LDRi12: - case ARM::STRi12: - case ARM::tLDRi: - case ARM::tSTRi: - case ARM::tLDRspi: - case ARM::tSTRspi: - case ARM::t2LDRi8: - case ARM::t2LDRi12: - case ARM::t2STRi8: - case ARM::t2STRi12: - case ARM::VLDRS: - case ARM::VSTRS: - return 4; - case ARM::VLDRD: - case ARM::VSTRD: - return 8; - case ARM::LDMIA: - case ARM::LDMDA: - case ARM::LDMDB: - case ARM::LDMIB: - case ARM::STMIA: - case ARM::STMDA: - case ARM::STMDB: - case ARM::STMIB: - case ARM::tLDMIA: - case ARM::tLDMIA_UPD: - case ARM::tSTMIA_UPD: - case ARM::t2LDMIA: - case ARM::t2LDMDB: - case ARM::t2STMIA: - case ARM::t2STMDB: - case ARM::VLDMSIA: - case ARM::VSTMSIA: - return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; - case ARM::VLDMDIA: - case ARM::VSTMDIA: - return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; - } + // Form a candidate from the Ops collected so far. + MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate; + for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C) + Candidate->Instrs.push_back(MemOps[C].MI); + Candidate->LatestMIIdx = Latest - SIndex; + Candidate->EarliestMIIdx = Earliest - SIndex; + Candidate->InsertPos = MemOps[Latest].Position; + if (Count == 1) + CanMergeToLSMulti = CanMergeToLSDouble = false; + Candidate->CanMergeToLSMulti = CanMergeToLSMulti; + Candidate->CanMergeToLSDouble = CanMergeToLSDouble; + Candidates.push_back(Candidate); + // Continue after the chain. + SIndex += Count; + } while (SIndex < EIndex); } static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, @@ -1081,6 +1060,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } } +/// Check if the given instruction increments or decrements a register and +/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags +/// generated by the instruction are possibly read as well. +static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, + ARMCC::CondCodes Pred, unsigned PredReg) { + bool CheckCPSRDef; + int Scale; + switch (MI.getOpcode()) { + case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; + case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; + case ARM::t2SUBri: + case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; + case ARM::t2ADDri: + case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; + case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; + case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; + default: return 0; + } + + unsigned MIPredReg; + if (MI.getOperand(0).getReg() != Reg || + MI.getOperand(1).getReg() != Reg || + getInstrPredicate(&MI, MIPredReg) != Pred || + MIPredReg != PredReg) + return 0; + + if (CheckCPSRDef && definesCPSR(&MI)) + return 0; + return MI.getOperand(2).getImm() * Scale; +} + +/// Searches for an increment or decrement of \p Reg before \p MBBI. +static MachineBasicBlock::iterator +findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg, + ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { + Offset = 0; + MachineBasicBlock &MBB = *MBBI->getParent(); + MachineBasicBlock::iterator BeginMBBI = MBB.begin(); + MachineBasicBlock::iterator EndMBBI = MBB.end(); + if (MBBI == BeginMBBI) + return EndMBBI; + + // Skip debug values. + MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); + while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI) + --PrevMBBI; + + Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg); + return Offset == 0 ? EndMBBI : PrevMBBI; +} + +/// Searches for a increment or decrement of \p Reg after \p MBBI. +static MachineBasicBlock::iterator +findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg, + ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) { + Offset = 0; + MachineBasicBlock &MBB = *MBBI->getParent(); + MachineBasicBlock::iterator EndMBBI = MBB.end(); + MachineBasicBlock::iterator NextMBBI = std::next(MBBI); + // Skip debug values. + while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) + ++NextMBBI; + if (NextMBBI == EndMBBI) + return EndMBBI; + + Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg); + return Offset == 0 ? EndMBBI : NextMBBI; +} + /// Fold proceeding/trailing inc/dec of base register into the /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: /// @@ -1093,21 +1141,17 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, /// ldmia rn, <ra, rb, rc> /// => /// ldmdb rn!, <ra, rb, rc> -bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - bool &Advance, - MachineBasicBlock::iterator &I) { +bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { // Thumb1 is already using updating loads/stores. if (isThumb1) return false; - MachineInstr *MI = MBBI; - unsigned Base = MI->getOperand(0).getReg(); - bool BaseKill = MI->getOperand(0).isKill(); - unsigned Bytes = getLSMultipleTransferSize(MI); + const MachineOperand &BaseOP = MI->getOperand(0); + unsigned Base = BaseOP.getReg(); + bool BaseKill = BaseOP.isKill(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); unsigned Opcode = MI->getOpcode(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc DL = MI->getDebugLoc(); // Can't use an updating ld/st if the base register is also a dest // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. @@ -1115,55 +1159,27 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, if (MI->getOperand(i).getReg() == Base) return false; - bool DoMerge = false; + int Bytes = getLSMultipleTransferSize(MI); + MachineBasicBlock &MBB = *MI->getParent(); + MachineBasicBlock::iterator MBBI(MI); + int Offset; + MachineBasicBlock::iterator MergeInstr + = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode); - - // Try merging with the previous instruction. - MachineBasicBlock::iterator BeginMBBI = MBB.begin(); - if (MBBI != BeginMBBI) { - MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); - while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) - --PrevMBBI; - if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - Mode = ARM_AM::db; - DoMerge = true; - } else if (Mode == ARM_AM::ib && - isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { - Mode = ARM_AM::da; - DoMerge = true; - } - if (DoMerge) - MBB.erase(PrevMBBI); - } - - // Try merging with the next instruction. - MachineBasicBlock::iterator EndMBBI = MBB.end(); - if (!DoMerge && MBBI != EndMBBI) { - MachineBasicBlock::iterator NextMBBI = std::next(MBBI); - while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) - ++NextMBBI; - if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && - isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && - isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { - DoMerge = true; - } - if (DoMerge) { - if (NextMBBI == I) { - Advance = true; - ++I; - } - MBB.erase(NextMBBI); - } + if (Mode == ARM_AM::ia && Offset == -Bytes) { + Mode = ARM_AM::db; + } else if (Mode == ARM_AM::ib && Offset == -Bytes) { + Mode = ARM_AM::da; + } else { + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) && + ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) + return false; } - - if (!DoMerge) - return false; + MBB.erase(MergeInstr); unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register .addReg(Base, getKillRegState(BaseKill)) .addImm(Pred).addReg(PredReg); @@ -1231,21 +1247,15 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, /// Fold proceeding/trailing inc/dec of base register into the /// LDR/STR/FLD{D|S}/FST{D|S} op when possible: -bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const TargetInstrInfo *TII, - bool &Advance, - MachineBasicBlock::iterator &I) { +bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { // Thumb1 doesn't have updating LDR/STR. // FIXME: Use LDM/STM with single register instead. if (isThumb1) return false; - MachineInstr *MI = MBBI; - unsigned Base = MI->getOperand(1).getReg(); - bool BaseKill = MI->getOperand(1).isKill(); - unsigned Bytes = getLSMultipleTransferSize(MI); + unsigned Base = getLoadStoreBaseOp(*MI).getReg(); + bool BaseKill = getLoadStoreBaseOp(*MI).isKill(); unsigned Opcode = MI->getOpcode(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc DL = MI->getDebugLoc(); bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); @@ -1255,7 +1265,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) return false; - bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be // writeback register. if (MI->getOperand(0).getReg() == Base) @@ -1263,64 +1272,38 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); - bool DoMerge = false; - ARM_AM::AddrOpc AddSub = ARM_AM::add; - unsigned NewOpc = 0; - // AM2 - 12 bits, thumb2 - 8 bits. - unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); - - // Try merging with the previous instruction. - MachineBasicBlock::iterator BeginMBBI = MBB.begin(); - if (MBBI != BeginMBBI) { - MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); - while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) - --PrevMBBI; - if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { - DoMerge = true; - AddSub = ARM_AM::sub; - } else if (!isAM5 && - isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { - DoMerge = true; - } - if (DoMerge) { - NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); - MBB.erase(PrevMBBI); - } - } - - // Try merging with the next instruction. - MachineBasicBlock::iterator EndMBBI = MBB.end(); - if (!DoMerge && MBBI != EndMBBI) { - MachineBasicBlock::iterator NextMBBI = std::next(MBBI); - while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) - ++NextMBBI; - if (!isAM5 && - isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { - DoMerge = true; - AddSub = ARM_AM::sub; - } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { - DoMerge = true; - } - if (DoMerge) { - NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); - if (NextMBBI == I) { - Advance = true; - ++I; - } - MBB.erase(NextMBBI); - } + int Bytes = getLSMultipleTransferSize(MI); + MachineBasicBlock &MBB = *MI->getParent(); + MachineBasicBlock::iterator MBBI(MI); + int Offset; + MachineBasicBlock::iterator MergeInstr + = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset); + unsigned NewOpc; + if (!isAM5 && Offset == Bytes) { + NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add); + } else if (Offset == -Bytes) { + NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); + } else { + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + if (Offset == Bytes) { + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add); + } else if (!isAM5 && Offset == -Bytes) { + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub); + } else + return false; } + MBB.erase(MergeInstr); - if (!DoMerge) - return false; + ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add; + bool isLd = isLoadSingle(Opcode); if (isAM5) { // VLDM[SD]_UPD, VSTM[SD]_UPD // (There are no base-updating versions of VLDR/VSTR instructions, but the // updating load/store-multiple instructions can be used with only one // register.) MachineOperand &MO = MI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) + BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register .addReg(Base, getKillRegState(isLd ? BaseKill : false)) .addImm(Pred).addReg(PredReg) @@ -1330,20 +1313,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (isAM2) { // LDR_PRE, LDR_POST if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { - int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); } else { - int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) - .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); } } else { - int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; // t2LDR_PRE, t2LDR_POST - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); } @@ -1353,15 +1334,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, // the vestigal zero-reg offset register. When that's fixed, this clause // can be removed entirely. if (isAM2 && NewOpc == ARM::STR_POST_IMM) { - int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); // STR_PRE, STR_POST - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) + BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) - .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg); } else { - int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; // t2STR_PRE, t2STR_POST - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) + BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); } @@ -1371,6 +1351,66 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return true; } +bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) && + "Must have t2STRDi8 or t2LDRDi8"); + if (MI.getOperand(3).getImm() != 0) + return false; + + // Behaviour for writeback is undefined if base register is the same as one + // of the others. + const MachineOperand &BaseOp = MI.getOperand(2); + unsigned Base = BaseOp.getReg(); + const MachineOperand &Reg0Op = MI.getOperand(0); + const MachineOperand &Reg1Op = MI.getOperand(1); + if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base) + return false; + + unsigned PredReg; + ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); + MachineBasicBlock::iterator MBBI(MI); + MachineBasicBlock &MBB = *MI.getParent(); + int Offset; + MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred, + PredReg, Offset); + unsigned NewOpc; + if (Offset == 8 || Offset == -8) { + NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE; + } else { + MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset); + if (Offset == 8 || Offset == -8) { + NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST; + } else + return false; + } + MBB.erase(MergeInstr); + + DebugLoc DL = MI.getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); + if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) { + MIB.addOperand(Reg0Op).addOperand(Reg1Op) + .addReg(BaseOp.getReg(), RegState::Define); + } else { + assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST); + MIB.addReg(BaseOp.getReg(), RegState::Define) + .addOperand(Reg0Op).addOperand(Reg1Op); + } + MIB.addReg(BaseOp.getReg(), RegState::Kill) + .addImm(Offset).addImm(Pred).addReg(PredReg); + assert(TII->get(Opcode).getNumOperands() == 6 && + TII->get(NewOpc).getNumOperands() == 7 && + "Unexpected number of operands in Opcode specification."); + + // Transfer implicit operands. + for (const MachineOperand &MO : MI.implicit_operands()) + MIB.addOperand(MO); + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + + MBB.erase(MBBI); + return true; +} + /// Returns true if instruction is a memory operation that this pass is capable /// of operating on. static bool isMemoryOp(const MachineInstr *MI) { @@ -1426,26 +1466,10 @@ static bool isMemoryOp(const MachineInstr *MI) { return false; } -/// Advance register scavenger to just before the earliest memory op that is -/// being merged. -void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { - MachineBasicBlock::iterator Loc = MemOps[0].MBBI; - unsigned Position = MemOps[0].Position; - for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { - if (MemOps[i].Position < Position) { - Position = MemOps[i].Position; - Loc = MemOps[i].MBBI; - } - } - - if (Loc != MBB.begin()) - RS->forward(std::prev(Loc)); -} - static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, - DebugLoc dl, unsigned NewOpc, + DebugLoc DL, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, bool OffKill, bool OffUndef, @@ -1491,7 +1515,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, if (!Errata602117 && !NonConsecutiveRegs) return false; - MachineBasicBlock::iterator NewBBI = MBBI; bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; bool EvenDeadKill = isLd ? @@ -1531,7 +1554,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); ++NumSTRD2STM; } - NewBBI = std::prev(MBBI); } else { // Split into two instructions. unsigned NewOpc = (isLd) @@ -1553,7 +1575,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); - NewBBI = std::prev(MBBI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1573,7 +1594,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); - NewBBI = std::prev(MBBI); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1585,191 +1605,160 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, ++NumSTRD2STR; } - MBB.erase(MI); - MBBI = NewBBI; + MBBI = MBB.erase(MBBI); return true; } /// An optimization pass to turn multiple LDR / STR ops of the same base and /// incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { - unsigned NumMerges = 0; - unsigned NumMemOps = 0; MemOpQueue MemOps; unsigned CurrBase = 0; unsigned CurrOpc = ~0u; - unsigned CurrSize = 0; ARMCC::CondCodes CurrPred = ARMCC::AL; - unsigned CurrPredReg = 0; unsigned Position = 0; - SmallVector<MachineBasicBlock::iterator,4> Merges; - - RS->enterBasicBlock(&MBB); - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { + assert(Candidates.size() == 0); + assert(MergeBaseCandidates.size() == 0); + LiveRegsValid = false; + + for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin(); + I = MBBI) { + // The instruction in front of the iterator is the one we look at. + MBBI = std::prev(I); if (FixInvalidRegPairOp(MBB, MBBI)) continue; + ++Position; - bool Advance = false; - bool TryMerge = false; - - bool isMemOp = isMemoryOp(MBBI); - if (isMemOp) { + if (isMemoryOp(MBBI)) { unsigned Opcode = MBBI->getOpcode(); - unsigned Size = getLSMultipleTransferSize(MBBI); const MachineOperand &MO = MBBI->getOperand(0); unsigned Reg = MO.getReg(); - bool isKill = MO.isDef() ? false : MO.isKill(); - unsigned Base = MBBI->getOperand(1).getReg(); + unsigned Base = getLoadStoreBaseOp(*MBBI).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); int Offset = getMemoryOpOffset(MBBI); - // Watch out for: - // r4 := ldr [r5] - // r5 := ldr [r5, #4] - // r6 := ldr [r5, #8] - // - // The second ldr has effectively broken the chain even though it - // looks like the later ldr(s) use the same base register. Try to - // merge the ldr's so far, including this one. But don't try to - // combine the following ldr(s). - bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg(); - - // Watch out for: - // r4 := ldr [r0, #8] - // r4 := ldr [r0, #4] - // - // The optimization may reorder the second ldr in front of the first - // ldr, which violates write after write(WAW) dependence. The same as - // str. Try to merge inst(s) already in MemOps. - bool Overlap = false; - for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { - if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { - Overlap = true; - break; - } - } - - if (CurrBase == 0 && !Clobber) { + if (CurrBase == 0) { // Start of a new chain. CurrBase = Base; CurrOpc = Opcode; - CurrSize = Size; CurrPred = Pred; - CurrPredReg = PredReg; - MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); - ++NumMemOps; - Advance = true; - } else if (!Overlap) { - if (Clobber) { - TryMerge = true; - Advance = true; + MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); + continue; + } + // Note: No need to match PredReg in the next if. + if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { + // Watch out for: + // r4 := ldr [r0, #8] + // r4 := ldr [r0, #4] + // or + // r0 := ldr [r0] + // If a load overrides the base register or a register loaded by + // another load in our chain, we cannot take this instruction. + bool Overlap = false; + if (isLoadSingle(Opcode)) { + Overlap = (Base == Reg); + if (!Overlap) { + for (const MemOpQueueEntry &E : MemOps) { + if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) { + Overlap = true; + break; + } + } + } } - if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { - // No need to match PredReg. - // Continue adding to the queue. + if (!Overlap) { + // Check offset and sort memory operation into the current chain. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, - Position, MBBI)); - ++NumMemOps; - Advance = true; + MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position)); + continue; } else { - for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); - I != E; ++I) { - if (Offset < I->Offset) { - MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, - Position, MBBI)); - ++NumMemOps; - Advance = true; + MemOpQueue::iterator MI, ME; + for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) { + if (Offset < MI->Offset) { + // Found a place to insert. break; - } else if (Offset == I->Offset) { - // Collision! This can't be merged! + } + if (Offset == MI->Offset) { + // Collision, abort. + MI = ME; break; } } + if (MI != MemOps.end()) { + MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position)); + continue; + } } } } - } - if (MBBI->isDebugValue()) { - ++MBBI; - if (MBBI == E) - // Reach the end of the block, try merging the memory instructions. - TryMerge = true; - } else if (Advance) { - ++Position; - ++MBBI; - if (MBBI == E) - // Reach the end of the block, try merging the memory instructions. - TryMerge = true; - } else { - TryMerge = true; + // Don't advance the iterator; The op will start a new chain next. + MBBI = I; + --Position; + // Fallthrough to look into existing chain. + } else if (MBBI->isDebugValue()) { + continue; + } else if (MBBI->getOpcode() == ARM::t2LDRDi8 || + MBBI->getOpcode() == ARM::t2STRDi8) { + // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions + // remember them because we may still be able to merge add/sub into them. + MergeBaseCandidates.push_back(MBBI); } - if (TryMerge) { - if (NumMemOps > 1) { - // Try to find a free register to use as a new base in case it's needed. - // First advance to the instruction just before the start of the chain. - AdvanceRS(MBB, MemOps); - - // Find a scratch register. - unsigned Scratch = - RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); - - // Process the load / store instructions. - RS->forward(std::prev(MBBI)); - - // Merge ops. - Merges.clear(); - MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, - CurrPred, CurrPredReg, Scratch, MemOps, Merges); - - // Try folding preceding/trailing base inc/dec into the generated - // LDM/STM ops. - for (unsigned i = 0, e = Merges.size(); i < e; ++i) - if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) - ++NumMerges; - NumMerges += Merges.size(); - - // Try folding preceding/trailing base inc/dec into those load/store - // that were not merged to form LDM/STM ops. - for (unsigned i = 0; i != NumMemOps; ++i) - if (!MemOps[i].Merged) - if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) - ++NumMerges; - - // RS may be pointing to an instruction that's deleted. - RS->skipTo(std::prev(MBBI)); - } else if (NumMemOps == 1) { - // Try folding preceding/trailing base inc/dec into the single - // load/store. - if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { - ++NumMerges; - RS->forward(std::prev(MBBI)); - } - } + // If we are here then the chain is broken; Extract candidates for a merge. + if (MemOps.size() > 0) { + FormCandidates(MemOps); + // Reset for the next chain. CurrBase = 0; CurrOpc = ~0u; - CurrSize = 0; CurrPred = ARMCC::AL; - CurrPredReg = 0; - if (NumMemOps) { - MemOps.clear(); - NumMemOps = 0; - } + MemOps.clear(); + } + } + if (MemOps.size() > 0) + FormCandidates(MemOps); - // If iterator hasn't been advanced and this is not a memory op, skip it. - // It can't start a new chain anyway. - if (!Advance && !isMemOp && MBBI != E) { - ++Position; - ++MBBI; + // Sort candidates so they get processed from end to begin of the basic + // block later; This is necessary for liveness calculation. + auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) { + return M0->InsertPos < M1->InsertPos; + }; + std::sort(Candidates.begin(), Candidates.end(), LessThan); + + // Go through list of candidates and merge. + bool Changed = false; + for (const MergeCandidate *Candidate : Candidates) { + if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) { + MachineInstr *Merged = MergeOpsUpdate(*Candidate); + // Merge preceding/trailing base inc/dec into the merged op. + if (Merged) { + Changed = true; + unsigned Opcode = Merged->getOpcode(); + if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8) + MergeBaseUpdateLSDouble(*Merged); + else + MergeBaseUpdateLSMultiple(Merged); + } else { + for (MachineInstr *MI : Candidate->Instrs) { + if (MergeBaseUpdateLoadStore(MI)) + Changed = true; + } } + } else { + assert(Candidate->Instrs.size() == 1); + if (MergeBaseUpdateLoadStore(Candidate->Instrs.front())) + Changed = true; } } - return NumMerges > 0; + Candidates.clear(); + // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt. + for (MachineInstr *MI : MergeBaseCandidates) + MergeBaseUpdateLSDouble(*MI); + MergeBaseCandidates.clear(); + + return Changed; } /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr") @@ -1814,12 +1803,14 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { } bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + MF = &Fn; STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); TL = STI->getTargetLowering(); AFI = Fn.getInfo<ARMFunctionInfo>(); TII = STI->getInstrInfo(); TRI = STI->getRegisterInfo(); - RS = new RegScavenger(); + MRI = &Fn.getRegInfo(); + RegClassInfoValid = false; isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; @@ -1832,7 +1823,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { Modified |= MergeReturnIntoLDM(MBB); } - delete RS; + Allocator.DestroyAll(); return Modified; } @@ -2219,7 +2210,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { continue; int Opc = MI->getOpcode(); - bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; + bool isLd = isLoadSingle(Opc); unsigned Base = MI->getOperand(1).getReg(); int Offset = getMemoryOpOffset(MI); diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index a59cf9851108..6cafbbb9f8eb 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -18,12 +18,6 @@ using namespace llvm; #define DEBUG_TYPE "arm-selectiondag-info" -ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL) - : TargetSelectionDAGInfo(&DL) {} - -ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { -} - // Emit, if possible, a specialized version of the given Libcall. Typically this // means selecting the appropriately aligned version, but we also convert memset // of 0 into memclr. @@ -83,7 +77,7 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext()); + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Args.push_back(Entry); if (AEABILibcall == AEABI_MEMCLR) { @@ -121,12 +115,14 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } }; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(TLI->getLibcallCallingConv(LC), - Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], - TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee( + TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], + TLI->getPointerTy(DAG.getDataLayout())), + std::move(Args), 0) + .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h index 1db190f41e1a..289879ee1d7e 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -37,8 +37,6 @@ namespace ARM_AM { class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit ARMSelectionDAGInfo(const DataLayout &DL); - ~ARMSelectionDAGInfo(); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 55808dfb9efe..002c3e9b6291 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -112,7 +112,6 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM), - TSInfo(*TM.getDataLayout()), FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. @@ -172,6 +171,7 @@ void ARMSubtarget::initializeEnvironment() { AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; + GenLongCalls = false; UnsafeFPMath = false; } @@ -286,7 +286,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, if (RelocM == Reloc::Static) return false; - bool isDecl = GV->isDeclarationForLinker(); + bool isDef = GV->isStrongDefinitionForLinker(); if (!isTargetMachO()) { // Extra load is needed for all externally visible. @@ -294,34 +294,22 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, return false; return true; } else { - if (RelocM == Reloc::PIC_) { - // If this is a strong reference to a definition, it is definitely not - // through a stub. - if (!isDecl && !GV->isWeakForLinker()) - return false; - - // Unless we have a symbol with hidden visibility, we have to go through a - // normal $non_lazy_ptr stub because this symbol might be resolved late. - if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. - return true; + // If this is a strong reference to a definition, it is definitely not + // through a stub. + if (isDef) + return false; + + // Unless we have a symbol with hidden visibility, we have to go through a + // normal $non_lazy_ptr stub because this symbol might be resolved late. + if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. + return true; + if (RelocM == Reloc::PIC_) { // If symbol visibility is hidden, we have a stub for common symbol // references and external declarations. - if (isDecl || GV->hasCommonLinkage()) + if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) // Hidden $non_lazy_ptr reference. return true; - - return false; - } else { - // If this is a strong reference to a definition, it is definitely not - // through a stub. - if (!isDecl && !GV->isWeakForLinker()) - return false; - - // Unless we have a symbol with hidden visibility, we have to go through a - // normal $non_lazy_ptr stub because this symbol might be resolved late. - if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. - return true; } } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 9909a6a6d198..dd101df9b63d 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -206,6 +206,9 @@ protected: /// NaCl TRAP instruction is generated instead of the regular TRAP. bool UseNaClTrap; + /// Generate calls via indirect call instructions. + bool GenLongCalls; + /// Target machine allowed unsafe FP math (such as use of NEON fp) bool UnsafeFPMath; @@ -342,6 +345,7 @@ public: bool hasMPExtension() const { return HasMPExtension; } bool hasThumb2DSP() const { return Thumb2DSP; } bool useNaClTrap() const { return UseNaClTrap; } + bool genLongCalls() const { return GenLongCalls; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 6e81bd2d349d..93495d66ae70 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -80,8 +80,7 @@ computeTargetABI(const Triple &TT, StringRef CPU, // FIXME: This is duplicated code from the front end and should be unified. if (TT.isOSBinFormatMachO()) { if (TT.getEnvironment() == llvm::Triple::EABI || - (TT.getOS() == llvm::Triple::UnknownOS && - TT.getObjectFormat() == llvm::Triple::MachO) || + (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) || CPU.startswith("cortex-m")) { TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; } else { @@ -104,8 +103,8 @@ computeTargetABI(const Triple &TT, StringRef CPU, TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; break; default: - if (TT.getOS() == llvm::Triple::NetBSD) - TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; + if (TT.isOSNetBSD()) + TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; else TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; break; diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index f4901fc24e44..2f194cf7ae06 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -61,14 +61,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || ISD == ISD::FP_EXTEND)) { - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second); if (Idx != -1) return LT.first * NEONFltDblTbl[Idx].Cost; } - EVT SrcTy = TLI->getValueType(Src); - EVT DstTy = TLI->getValueType(Dst); + EVT SrcTy = TLI->getValueType(DL, Src); + EVT DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) return BaseT::getCastInstrCost(Opcode, Dst, Src); @@ -282,8 +282,8 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } }; - EVT SelCondTy = TLI->getValueType(CondTy); - EVT SelValTy = TLI->getValueType(ValTy); + EVT SelCondTy = TLI->getValueType(DL, CondTy); + EVT SelValTy = TLI->getValueType(DL, ValTy); if (SelCondTy.isSimple() && SelValTy.isSimple()) { int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, SelCondTy.getSimpleVT(), @@ -292,7 +292,7 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return NEONVectorSelectTbl[Idx].Cost; } - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); return LT.first; } @@ -353,7 +353,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); if (Idx == -1) @@ -379,7 +379,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); int Idx = CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); if (Idx == -1) @@ -395,7 +395,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost( TTI::OperandValueProperties Opd2PropInfo) { int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); const unsigned FunctionCallDivCost = 20; const unsigned ReciprocalDivCost = 10; @@ -468,7 +468,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost( unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) { - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); if (Src->isVectorTy() && Alignment != 16 && Src->getVectorElementType()->isDoubleTy()) { @@ -488,12 +488,12 @@ unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, assert(isa<VectorType>(VecTy) && "Expect a vector type"); // vldN/vstN doesn't support vector types of i64/f64 element. - bool EltIs64Bits = DL->getTypeAllocSizeInBits(VecTy->getScalarType()) == 64; + bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64; if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) { unsigned NumElts = VecTy->getVectorNumElements(); Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy); + unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy); // vldN/vstN only support legal vector types of size 64 or 128 in bits. if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index f2e5db655ccf..84f256f73722 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -42,7 +42,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { public: explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F) - : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. ARMTTIImpl(const ARMTTIImpl &Arg) @@ -50,18 +51,6 @@ public: ARMTTIImpl(ARMTTIImpl &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - ARMTTIImpl &operator=(const ARMTTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - ARMTTIImpl &operator=(ARMTTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } /// \name Scalar TTI Implementations /// @{ diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c2db74619871..f8f0eb2d4baa 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -189,9 +189,9 @@ class ARMAsmParser : public MCTargetAsmParser { return getParser().Error(L, Msg, Ranges); } - bool validatetLDMRegList(MCInst Inst, const OperandVector &Operands, + bool validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands, unsigned ListNo, bool IsARPop = false); - bool validatetSTMRegList(MCInst Inst, const OperandVector &Operands, + bool validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands, unsigned ListNo); int tryParseRegister(); @@ -242,6 +242,8 @@ class ARMAsmParser : public MCTargetAsmParser { bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode); + void tryConvertingToTwoOperandForm(StringRef Mnemonic, bool CarrySetting, + OperandVector &Operands); bool isThumb() const { // FIXME: Can tablegen auto-generate this? return STI.getFeatureBits()[ARM::ModeThumb]; @@ -5465,6 +5467,92 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, CanAcceptPredicationCode = true; } +// \brief Some Thumb instructions have two operand forms that are not +// available as three operand, convert to two operand form if possible. +// +// FIXME: We would really like to be able to tablegen'erate this. +void ARMAsmParser::tryConvertingToTwoOperandForm(StringRef Mnemonic, + bool CarrySetting, + OperandVector &Operands) { + if (Operands.size() != 6) + return; + + const auto &Op3 = static_cast<ARMOperand &>(*Operands[3]); + auto &Op4 = static_cast<ARMOperand &>(*Operands[4]); + if (!Op3.isReg() || !Op4.isReg()) + return; + + auto Op3Reg = Op3.getReg(); + auto Op4Reg = Op4.getReg(); + + // For most Thumb2 cases we just generate the 3 operand form and reduce + // it in processInstruction(), but the 3 operand form of ADD (t2ADDrr) + // won't accept SP or PC so we do the transformation here taking care + // with immediate range in the 'add sp, sp #imm' case. + auto &Op5 = static_cast<ARMOperand &>(*Operands[5]); + if (isThumbTwo()) { + if (Mnemonic != "add") + return; + bool TryTransform = Op3Reg == ARM::PC || Op4Reg == ARM::PC || + (Op5.isReg() && Op5.getReg() == ARM::PC); + if (!TryTransform) { + TryTransform = (Op3Reg == ARM::SP || Op4Reg == ARM::SP || + (Op5.isReg() && Op5.getReg() == ARM::SP)) && + !(Op3Reg == ARM::SP && Op4Reg == ARM::SP && + Op5.isImm() && !Op5.isImm0_508s4()); + } + if (!TryTransform) + return; + } else if (!isThumbOne()) + return; + + if (!(Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" || + Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" || + Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" || + Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) + return; + + // If first 2 operands of a 3 operand instruction are the same + // then transform to 2 operand version of the same instruction + // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1' + bool Transform = Op3Reg == Op4Reg; + + // For communtative operations, we might be able to transform if we swap + // Op4 and Op5. The 'ADD Rdm, SP, Rdm' form is already handled specially + // as tADDrsp. + const ARMOperand *LastOp = &Op5; + bool Swap = false; + if (!Transform && Op5.isReg() && Op3Reg == Op5.getReg() && + ((Mnemonic == "add" && Op4Reg != ARM::SP) || + Mnemonic == "and" || Mnemonic == "eor" || + Mnemonic == "adc" || Mnemonic == "orr")) { + Swap = true; + LastOp = &Op4; + Transform = true; + } + + // If both registers are the same then remove one of them from + // the operand list, with certain exceptions. + if (Transform) { + // Don't transform 'adds Rd, Rd, Rm' or 'sub{s} Rd, Rd, Rm' because the + // 2 operand forms don't exist. + if (((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub") && + LastOp->isReg()) + Transform = false; + + // Don't transform 'add/sub{s} Rd, Rd, #imm' if the immediate fits into + // 3-bits because the ARMARM says not to. + if ((Mnemonic == "add" || Mnemonic == "sub") && LastOp->isImm0_7()) + Transform = false; + } + + if (Transform) { + if (Swap) + std::swap(Op4, Op5); + Operands.erase(Operands.begin() + 3); + } +} + bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands) { // FIXME: This is all horribly hacky. We really need a better way to deal @@ -5838,6 +5926,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, "VFP/Neon double precision register expected"); } + tryConvertingToTwoOperandForm(Mnemonic, CarrySetting, Operands); + // Some instructions, mostly Thumb, have forms for the same mnemonic that // do and don't have a cc_out optional-def operand. With some spot-checks // of the operand list, we can figure out which variant we're trying to @@ -5901,48 +5991,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } - // If first 2 operands of a 3 operand instruction are the same - // then transform to 2 operand version of the same instruction - // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1' - // FIXME: We would really like to be able to tablegen'erate this. - if (isThumbOne() && Operands.size() == 6 && - (Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" || - Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" || - Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" || - Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) { - ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); - ARMOperand &Op4 = static_cast<ARMOperand &>(*Operands[4]); - ARMOperand &Op5 = static_cast<ARMOperand &>(*Operands[5]); - - // If both registers are the same then remove one of them from - // the operand list. - if (Op3.isReg() && Op4.isReg() && Op3.getReg() == Op4.getReg()) { - // If 3rd operand (variable Op5) is a register and the instruction is adds/sub - // then do not transform as the backend already handles this instruction - // correctly. - if (!Op5.isReg() || !((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub")) { - Operands.erase(Operands.begin() + 3); - if (Mnemonic == "add" && !CarrySetting) { - // Special case for 'add' (not 'adds') instruction must - // remove the CCOut operand as well. - Operands.erase(Operands.begin() + 1); - } - } - } - } - - // If instruction is 'add' and first two register operands - // use SP register, then remove one of the SP registers from - // the instruction. - // FIXME: We would really like to be able to tablegen'erate this. - if (isThumbOne() && Operands.size() == 5 && Mnemonic == "add" && !CarrySetting) { - ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); - ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); - if (Op2.isReg() && Op3.isReg() && Op2.getReg() == ARM::SP && Op3.getReg() == ARM::SP) { - Operands.erase(Operands.begin() + 2); - } - } - // GNU Assembler extension (compatibility) if ((Mnemonic == "ldrd" || Mnemonic == "strd")) { ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); @@ -5985,8 +6033,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // return 'true' if register list contains non-low GPR registers, // 'false' otherwise. If Reg is in the register list or is HiReg, set // 'containsReg' to true. -static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg, - unsigned HiReg, bool &containsReg) { +static bool checkLowRegisterList(const MCInst &Inst, unsigned OpNo, + unsigned Reg, unsigned HiReg, + bool &containsReg) { containsReg = false; for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) { unsigned OpReg = Inst.getOperand(i).getReg(); @@ -6001,8 +6050,8 @@ static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg, // Check if the specified regisgter is in the register list of the inst, // starting at the indicated operand number. -static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) { - for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) { +static bool listContainsReg(const MCInst &Inst, unsigned OpNo, unsigned Reg) { + for (unsigned i = OpNo, e = Inst.getNumOperands(); i < e; ++i) { unsigned OpReg = Inst.getOperand(i).getReg(); if (OpReg == Reg) return true; @@ -6020,7 +6069,7 @@ static bool instIsBreakpoint(const MCInst &Inst) { } -bool ARMAsmParser::validatetLDMRegList(MCInst Inst, +bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands, unsigned ListNo, bool IsARPop) { const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]); @@ -6043,7 +6092,7 @@ bool ARMAsmParser::validatetLDMRegList(MCInst Inst, return false; } -bool ARMAsmParser::validatetSTMRegList(MCInst Inst, +bool ARMAsmParser::validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands, unsigned ListNo) { const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]); @@ -8167,8 +8216,16 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // If the destination and first source operand are the same, and // there's no setting of the flags, use encoding T2 instead of T3. // Note that this is only for ADD, not SUB. This mirrors the system - // 'as' behaviour. Make sure the wide encoding wasn't explicit. - if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || + // 'as' behaviour. Also take advantage of ADD being commutative. + // Make sure the wide encoding wasn't explicit. + bool Swap = false; + auto DestReg = Inst.getOperand(0).getReg(); + bool Transform = DestReg == Inst.getOperand(1).getReg(); + if (!Transform && DestReg == Inst.getOperand(2).getReg()) { + Transform = true; + Swap = true; + } + if (!Transform || Inst.getOperand(5).getReg() != 0 || (static_cast<ARMOperand &>(*Operands[3]).isToken() && static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) @@ -8177,7 +8234,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, TmpInst.setOpcode(ARM::tADDhirr); TmpInst.addOperand(Inst.getOperand(0)); TmpInst.addOperand(Inst.getOperand(0)); - TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(Swap ? 1 : 2)); TmpInst.addOperand(Inst.getOperand(3)); TmpInst.addOperand(Inst.getOperand(4)); Inst = TmpInst; @@ -9176,8 +9233,7 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { return false; } - STI.InitMCProcessorInfo(CPU, ""); - STI.InitCPUSchedModel(CPU); + STI.setDefaultFeatures(CPU); setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); return false; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 4d12bfb5d60f..d17fdb95dbdf 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -1362,7 +1362,7 @@ MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) { MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { const Triple &TT = STI.getTargetTriple(); - if (TT.getObjectFormat() == Triple::ELF) + if (TT.isOSBinFormatELF()) return new ARMTargetELFStreamer(S); return new ARMTargetStreamer(S); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index fafe25ae5be5..21c9fc1e58b2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -31,7 +31,7 @@ using namespace llvm; #define GET_REGINFO_MC_DESC #include "ARMGenRegisterInfo.inc" -static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, +static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) { if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] && (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) && @@ -63,7 +63,7 @@ static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, return false; } -static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, +static bool getITDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) { if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) { @@ -75,7 +75,7 @@ static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, return false; } -static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, +static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) { assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] && "cannot predicate thumb instructions"); @@ -92,7 +92,7 @@ static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, return false; } -static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, +static bool getARMLoadDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, std::string &Info) { assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] && "cannot predicate thumb instructions"); @@ -257,9 +257,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT, ArchFS = FS; } - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitARMMCSubtargetInfo(X, TT, CPU, ArchFS); - return X; + return createARMMCSubtargetInfoImpl(TT, CPU, ArchFS); } static MCInstrInfo *createARMMCInstrInfo() { @@ -268,7 +266,7 @@ static MCInstrInfo *createARMMCInstrInfo() { return X; } -static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { +static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) { MCRegisterInfo *X = new MCRegisterInfo(); InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC); return X; @@ -279,10 +277,10 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, MCAsmInfo *MAI; if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO()) MAI = new ARMMCAsmInfoDarwin(TheTriple); - else if (TheTriple.isWindowsItaniumEnvironment()) - MAI = new ARMCOFFMCAsmInfoGNU(); else if (TheTriple.isWindowsMSVCEnvironment()) MAI = new ARMCOFFMCAsmInfoMicrosoft(); + else if (TheTriple.isOSWindows()) + MAI = new ARMCOFFMCAsmInfoGNU(); else MAI = new ARMELFMCAsmInfo(TheTriple); @@ -292,14 +290,13 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createARMMCCodeGenInfo(const Triple &TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) { - Triple TheTriple(TT); // Default relocation model on Darwin is PIC, not DynamicNoPIC. - RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC; + RM = TT.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC; } X->initMCCodeGenInfo(RM, CM, OL); return X; diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 77cd890e4cad..3b4358b5d9bf 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -365,7 +365,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. if (NumBytes) { - assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && + assert(!MFI->getPristineRegs(MF).test(ARM::R4) && "No scratch register to restore SP from FP!"); emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, TII, *RegInfo); diff --git a/lib/Target/BPF/BPFFrameLowering.cpp b/lib/Target/BPF/BPFFrameLowering.cpp index 54c5ececc7de..c2806c85f24f 100644 --- a/lib/Target/BPF/BPFFrameLowering.cpp +++ b/lib/Target/BPF/BPFFrameLowering.cpp @@ -29,12 +29,12 @@ void BPFFrameLowering::emitPrologue(MachineFunction &MF, void BPFFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {} -void BPFFrameLowering::processFunctionBeforeCalleeSavedScan( - MachineFunction &MF, RegScavenger *RS) const { - MachineRegisterInfo &MRI = MF.getRegInfo(); - - MRI.setPhysRegUnused(BPF::R6); - MRI.setPhysRegUnused(BPF::R7); - MRI.setPhysRegUnused(BPF::R8); - MRI.setPhysRegUnused(BPF::R9); +void BPFFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + SavedRegs.reset(BPF::R6); + SavedRegs.reset(BPF::R7); + SavedRegs.reset(BPF::R8); + SavedRegs.reset(BPF::R9); } diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h index 3b9fc443e053..251cda965ff5 100644 --- a/lib/Target/BPF/BPFFrameLowering.h +++ b/lib/Target/BPF/BPFFrameLowering.h @@ -28,8 +28,8 @@ public: void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index 38c56bbef81e..58498a1aec7d 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -302,8 +302,9 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getContext()->diagnose(Err); } + auto PtrVT = getPointerTy(MF.getDataLayout()); Chain = DAG.getCALLSEQ_START( - Chain, DAG.getConstant(NumBytes, CLI.DL, getPointerTy(), true), CLI.DL); + Chain, DAG.getConstant(NumBytes, CLI.DL, PtrVT, true), CLI.DL); SmallVector<std::pair<unsigned, SDValue>, 5> RegsToPass; @@ -350,10 +351,10 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy(), + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT, G->getOffset(), 0); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) - Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy(), 0); + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -374,8 +375,8 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END( - Chain, DAG.getConstant(NumBytes, CLI.DL, getPointerTy(), true), - DAG.getConstant(0, CLI.DL, getPointerTy(), true), InFlag, CLI.DL); + Chain, DAG.getConstant(NumBytes, CLI.DL, PtrVT, true), + DAG.getConstant(0, CLI.DL, PtrVT, true), InFlag, CLI.DL); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we diff --git a/lib/Target/BPF/BPFSubtarget.cpp b/lib/Target/BPF/BPFSubtarget.cpp index 65acd585116d..c3a8b1caa63d 100644 --- a/lib/Target/BPF/BPFSubtarget.cpp +++ b/lib/Target/BPF/BPFSubtarget.cpp @@ -28,4 +28,4 @@ void BPFSubtarget::anchor() {} BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) : BPFGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this), - TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {} + TLInfo(TM, *this) {} diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index 3e928fc93a37..840570ebc392 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -40,7 +40,7 @@ static MCInstrInfo *createBPFMCInstrInfo() { return X; } -static MCRegisterInfo *createBPFMCRegisterInfo(StringRef TT) { +static MCRegisterInfo *createBPFMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); InitBPFMCRegisterInfo(X, BPF::R11 /* RAReg doesn't exist */); return X; @@ -48,12 +48,10 @@ static MCRegisterInfo *createBPFMCRegisterInfo(StringRef TT) { static MCSubtargetInfo *createBPFMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitBPFMCSubtargetInfo(X, TT, CPU, FS); - return X; + return createBPFMCSubtargetInfoImpl(TT, CPU, FS); } -static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createBPFMCCodeGenInfo(const Triple &TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index bc5d7f65b2f6..272688edb8a1 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -2148,8 +2148,8 @@ char CppWriter::ID = 0; bool CPPTargetMachine::addPassesToEmitFile( PassManagerBase &PM, raw_pwrite_stream &o, CodeGenFileType FileType, - bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter, - MachineFunctionInitializer *MFInitializer) { + bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, + AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; auto FOut = llvm::make_unique<formatted_raw_ostream>(o); diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index ebf0635b12e4..00e402feffbc 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -31,7 +31,8 @@ struct CPPTargetMachine : public TargetMachine { public: bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, bool DisableVerify, - AnalysisID StartAfter, AnalysisID StopAfter, + AnalysisID StartBefore, AnalysisID StartAfter, + AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) override; }; diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp new file mode 100644 index 000000000000..cb7e633fb82f --- /dev/null +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -0,0 +1,1127 @@ +//===--- BitTracker.cpp ---------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// SSA-based bit propagation. +// +// The purpose of this code is, for a given virtual register, to provide +// information about the value of each bit in the register. The values +// of bits are represented by the class BitValue, and take one of four +// cases: 0, 1, "ref" and "bottom". The 0 and 1 are rather clear, the +// "ref" value means that the bit is a copy of another bit (which itself +// cannot be a copy of yet another bit---such chains are not allowed). +// A "ref" value is associated with a BitRef structure, which indicates +// which virtual register, and which bit in that register is the origin +// of the value. For example, given an instruction +// vreg2 = ASL vreg1, 1 +// assuming that nothing is known about bits of vreg1, bit 1 of vreg2 +// will be a "ref" to (vreg1, 0). If there is a subsequent instruction +// vreg3 = ASL vreg2, 2 +// then bit 3 of vreg3 will be a "ref" to (vreg1, 0) as well. +// The "bottom" case means that the bit's value cannot be determined, +// and that this virtual register actually defines it. The "bottom" case +// is discussed in detail in BitTracker.h. In fact, "bottom" is a "ref +// to self", so for the vreg1 above, the bit 0 of it will be a "ref" to +// (vreg1, 0), bit 1 will be a "ref" to (vreg1, 1), etc. +// +// The tracker implements the Wegman-Zadeck algorithm, originally developed +// for SSA-based constant propagation. Each register is represented as +// a sequence of bits, with the convention that bit 0 is the least signi- +// ficant bit. Each bit is propagated individually. The class RegisterCell +// implements the register's representation, and is also the subject of +// the lattice operations in the tracker. +// +// The intended usage of the bit tracker is to create a target-specific +// machine instruction evaluator, pass the evaluator to the BitTracker +// object, and run the tracker. The tracker will then collect the bit +// value information for a given machine function. After that, it can be +// queried for the cells for each virtual register. +// Sample code: +// const TargetSpecificEvaluator TSE(TRI, MRI); +// BitTracker BT(TSE, MF); +// BT.run(); +// ... +// unsigned Reg = interestingRegister(); +// RegisterCell RC = BT.get(Reg); +// if (RC[3].is(1)) +// Reg0bit3 = 1; +// +// The code below is intended to be fully target-independent. + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include "BitTracker.h" + +using namespace llvm; + +typedef BitTracker BT; + +namespace { + // Local trickery to pretty print a register (without the whole "%vreg" + // business). + struct printv { + printv(unsigned r) : R(r) {} + unsigned R; + }; + raw_ostream &operator<< (raw_ostream &OS, const printv &PV) { + if (PV.R) + OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R); + else + OS << 's'; + return OS; + } +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::BitValue &BV) { + switch (BV.Type) { + case BT::BitValue::Top: + OS << 'T'; + break; + case BT::BitValue::Zero: + OS << '0'; + break; + case BT::BitValue::One: + OS << '1'; + break; + case BT::BitValue::Ref: + OS << printv(BV.RefI.Reg) << '[' << BV.RefI.Pos << ']'; + break; + } + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::RegisterCell &RC) { + unsigned n = RC.Bits.size(); + OS << "{ w:" << n; + // Instead of printing each bit value individually, try to group them + // into logical segments, such as sequences of 0 or 1 bits or references + // to consecutive bits (e.g. "bits 3-5 are same as bits 7-9 of reg xyz"). + // "Start" will be the index of the beginning of the most recent segment. + unsigned Start = 0; + bool SeqRef = false; // A sequence of refs to consecutive bits. + bool ConstRef = false; // A sequence of refs to the same bit. + + for (unsigned i = 1, n = RC.Bits.size(); i < n; ++i) { + const BT::BitValue &V = RC[i]; + const BT::BitValue &SV = RC[Start]; + bool IsRef = (V.Type == BT::BitValue::Ref); + // If the current value is the same as Start, skip to the next one. + if (!IsRef && V == SV) + continue; + if (IsRef && SV.Type == BT::BitValue::Ref && V.RefI.Reg == SV.RefI.Reg) { + if (Start+1 == i) { + SeqRef = (V.RefI.Pos == SV.RefI.Pos+1); + ConstRef = (V.RefI.Pos == SV.RefI.Pos); + } + if (SeqRef && V.RefI.Pos == SV.RefI.Pos+(i-Start)) + continue; + if (ConstRef && V.RefI.Pos == SV.RefI.Pos) + continue; + } + + // The current value is different. Print the previous one and reset + // the Start. + OS << " [" << Start; + unsigned Count = i - Start; + if (Count == 1) { + OS << "]:" << SV; + } else { + OS << '-' << i-1 << "]:"; + if (SV.Type == BT::BitValue::Ref && SeqRef) + OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-' + << SV.RefI.Pos+(Count-1) << ']'; + else + OS << SV; + } + Start = i; + SeqRef = ConstRef = false; + } + + OS << " [" << Start; + unsigned Count = n - Start; + if (n-Start == 1) { + OS << "]:" << RC[Start]; + } else { + OS << '-' << n-1 << "]:"; + const BT::BitValue &SV = RC[Start]; + if (SV.Type == BT::BitValue::Ref && SeqRef) + OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-' + << SV.RefI.Pos+(Count-1) << ']'; + else + OS << SV; + } + OS << " }"; + + return OS; +} + +BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F) + : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {} + +BitTracker::~BitTracker() { + delete ⤅ +} + + +// If we were allowed to update a cell for a part of a register, the meet +// operation would need to be parametrized by the register number and the +// exact part of the register, so that the computer BitRefs correspond to +// the actual bits of the "self" register. +// While this cannot happen in the current implementation, I'm not sure +// if this should be ruled out in the future. +bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) { + // An example when "meet" can be invoked with SelfR == 0 is a phi node + // with a physical register as an operand. + assert(SelfR == 0 || TargetRegisterInfo::isVirtualRegister(SelfR)); + bool Changed = false; + for (uint16_t i = 0, n = Bits.size(); i < n; ++i) { + const BitValue &RCV = RC[i]; + Changed |= Bits[i].meet(RCV, BitRef(SelfR, i)); + } + return Changed; +} + + +// Insert the entire cell RC into the current cell at position given by M. +BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC, + const BitMask &M) { + uint16_t B = M.first(), E = M.last(), W = width(); + // Sanity: M must be a valid mask for *this. + assert(B < W && E < W); + // Sanity: the masked part of *this must have the same number of bits + // as the source. + assert(B > E || E-B+1 == RC.width()); // B <= E => E-B+1 = |RC|. + assert(B <= E || E+(W-B)+1 == RC.width()); // E < B => E+(W-B)+1 = |RC|. + if (B <= E) { + for (uint16_t i = 0; i <= E-B; ++i) + Bits[i+B] = RC[i]; + } else { + for (uint16_t i = 0; i < W-B; ++i) + Bits[i+B] = RC[i]; + for (uint16_t i = 0; i <= E; ++i) + Bits[i] = RC[i+(W-B)]; + } + return *this; +} + + +BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const { + uint16_t B = M.first(), E = M.last(), W = width(); + assert(B < W && E < W); + if (B <= E) { + RegisterCell RC(E-B+1); + for (uint16_t i = B; i <= E; ++i) + RC.Bits[i-B] = Bits[i]; + return RC; + } + + RegisterCell RC(E+(W-B)+1); + for (uint16_t i = 0; i < W-B; ++i) + RC.Bits[i] = Bits[i+B]; + for (uint16_t i = 0; i <= E; ++i) + RC.Bits[i+(W-B)] = Bits[i]; + return RC; +} + + +BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) { + // Rotate left (i.e. towards increasing bit indices). + // Swap the two parts: [0..W-Sh-1] [W-Sh..W-1] + uint16_t W = width(); + Sh = Sh % W; + if (Sh == 0) + return *this; + + RegisterCell Tmp(W-Sh); + // Tmp = [0..W-Sh-1]. + for (uint16_t i = 0; i < W-Sh; ++i) + Tmp[i] = Bits[i]; + // Shift [W-Sh..W-1] to [0..Sh-1]. + for (uint16_t i = 0; i < Sh; ++i) + Bits[i] = Bits[W-Sh+i]; + // Copy Tmp to [Sh..W-1]. + for (uint16_t i = 0; i < W-Sh; ++i) + Bits[i+Sh] = Tmp.Bits[i]; + return *this; +} + + +BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E, + const BitValue &V) { + assert(B <= E); + while (B < E) + Bits[B++] = V; + return *this; +} + + +BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) { + // Append the cell given as the argument to the "this" cell. + // Bit 0 of RC becomes bit W of the result, where W is this->width(). + uint16_t W = width(), WRC = RC.width(); + Bits.resize(W+WRC); + for (uint16_t i = 0; i < WRC; ++i) + Bits[i+W] = RC.Bits[i]; + return *this; +} + + +uint16_t BT::RegisterCell::ct(bool B) const { + uint16_t W = width(); + uint16_t C = 0; + BitValue V = B; + while (C < W && Bits[C] == V) + C++; + return C; +} + + +uint16_t BT::RegisterCell::cl(bool B) const { + uint16_t W = width(); + uint16_t C = 0; + BitValue V = B; + while (C < W && Bits[W-(C+1)] == V) + C++; + return C; +} + + +bool BT::RegisterCell::operator== (const RegisterCell &RC) const { + uint16_t W = Bits.size(); + if (RC.Bits.size() != W) + return false; + for (uint16_t i = 0; i < W; ++i) + if (Bits[i] != RC[i]) + return false; + return true; +} + + +uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { + // The general problem is with finding a register class that corresponds + // to a given reference reg:sub. There can be several such classes, and + // since we only care about the register size, it does not matter which + // such class we would find. + // The easiest way to accomplish what we want is to + // 1. find a physical register PhysR from the same class as RR.Reg, + // 2. find a physical register PhysS that corresponds to PhysR:RR.Sub, + // 3. find a register class that contains PhysS. + unsigned PhysR; + if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) { + const TargetRegisterClass *VC = MRI.getRegClass(RR.Reg); + assert(VC->begin() != VC->end() && "Empty register class"); + PhysR = *VC->begin(); + } else { + assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg)); + PhysR = RR.Reg; + } + + unsigned PhysS = (RR.Sub == 0) ? PhysR : TRI.getSubReg(PhysR, RR.Sub); + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PhysS); + uint16_t BW = RC->getSize()*8; + return BW; +} + + +BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, + const CellMapType &M) const { + uint16_t BW = getRegBitWidth(RR); + + // Physical registers are assumed to be present in the map with an unknown + // value. Don't actually insert anything in the map, just return the cell. + if (TargetRegisterInfo::isPhysicalRegister(RR.Reg)) + return RegisterCell::self(0, BW); + + assert(TargetRegisterInfo::isVirtualRegister(RR.Reg)); + // For virtual registers that belong to a class that is not tracked, + // generate an "unknown" value as well. + const TargetRegisterClass *C = MRI.getRegClass(RR.Reg); + if (!track(C)) + return RegisterCell::self(0, BW); + + CellMapType::const_iterator F = M.find(RR.Reg); + if (F != M.end()) { + if (!RR.Sub) + return F->second; + BitMask M = mask(RR.Reg, RR.Sub); + return F->second.extract(M); + } + // If not found, create a "top" entry, but do not insert it in the map. + return RegisterCell::top(BW); +} + + +void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, + CellMapType &M) const { + // While updating the cell map can be done in a meaningful way for + // a part of a register, it makes little sense to implement it as the + // SSA representation would never contain such "partial definitions". + if (!TargetRegisterInfo::isVirtualRegister(RR.Reg)) + return; + assert(RR.Sub == 0 && "Unexpected sub-register in definition"); + // Eliminate all ref-to-reg-0 bit values: replace them with "self". + for (unsigned i = 0, n = RC.width(); i < n; ++i) { + const BitValue &V = RC[i]; + if (V.Type == BitValue::Ref && V.RefI.Reg == 0) + RC[i].RefI = BitRef(RR.Reg, i); + } + M[RR.Reg] = RC; +} + + +// Check if the cell represents a compile-time integer value. +bool BT::MachineEvaluator::isInt(const RegisterCell &A) const { + uint16_t W = A.width(); + for (uint16_t i = 0; i < W; ++i) + if (!A[i].is(0) && !A[i].is(1)) + return false; + return true; +} + + +// Convert a cell to the integer value. The result must fit in uint64_t. +uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const { + assert(isInt(A)); + uint64_t Val = 0; + uint16_t W = A.width(); + for (uint16_t i = 0; i < W; ++i) { + Val <<= 1; + Val |= A[i].is(1); + } + return Val; +} + + +// Evaluator helper functions. These implement some common operation on +// register cells that can be used to implement target-specific instructions +// in a target-specific evaluator. + +BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const { + RegisterCell Res(W); + // For bits beyond the 63rd, this will generate the sign bit of V. + for (uint16_t i = 0; i < W; ++i) { + Res[i] = BitValue(V & 1); + V >>= 1; + } + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const { + APInt A = CI->getValue(); + uint16_t BW = A.getBitWidth(); + assert((unsigned)BW == A.getBitWidth() && "BitWidth overflow"); + RegisterCell Res(BW); + for (uint16_t i = 0; i < BW; ++i) + Res[i] = A[i]; + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width(); + assert(W == A2.width()); + RegisterCell Res(W); + bool Carry = false; + uint16_t I; + for (I = 0; I < W; ++I) { + const BitValue &V1 = A1[I]; + const BitValue &V2 = A2[I]; + if (!V1.num() || !V2.num()) + break; + unsigned S = bool(V1) + bool(V2) + Carry; + Res[I] = BitValue(S & 1); + Carry = (S > 1); + } + for (; I < W; ++I) { + const BitValue &V1 = A1[I]; + const BitValue &V2 = A2[I]; + // If the next bit is same as Carry, the result will be 0 plus the + // other bit. The Carry bit will remain unchanged. + if (V1.is(Carry)) + Res[I] = BitValue::ref(V2); + else if (V2.is(Carry)) + Res[I] = BitValue::ref(V1); + else + break; + } + for (; I < W; ++I) + Res[I] = BitValue::self(); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width(); + assert(W == A2.width()); + RegisterCell Res(W); + bool Borrow = false; + uint16_t I; + for (I = 0; I < W; ++I) { + const BitValue &V1 = A1[I]; + const BitValue &V2 = A2[I]; + if (!V1.num() || !V2.num()) + break; + unsigned S = bool(V1) - bool(V2) - Borrow; + Res[I] = BitValue(S & 1); + Borrow = (S > 1); + } + for (; I < W; ++I) { + const BitValue &V1 = A1[I]; + const BitValue &V2 = A2[I]; + if (V1.is(Borrow)) { + Res[I] = BitValue::ref(V2); + break; + } + if (V2.is(Borrow)) + Res[I] = BitValue::ref(V1); + else + break; + } + for (; I < W; ++I) + Res[I] = BitValue::self(); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width() + A2.width(); + uint16_t Z = A1.ct(0) + A2.ct(0); + RegisterCell Res(W); + Res.fill(0, Z, BitValue::Zero); + Res.fill(Z, W, BitValue::self()); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width() + A2.width(); + uint16_t Z = A1.ct(0) + A2.ct(0); + RegisterCell Res(W); + Res.fill(0, Z, BitValue::Zero); + Res.fill(Z, W, BitValue::self()); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1, + uint16_t Sh) const { + assert(Sh <= A1.width()); + RegisterCell Res = RegisterCell::ref(A1); + Res.rol(Sh); + Res.fill(0, Sh, BitValue::Zero); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1, + uint16_t Sh) const { + uint16_t W = A1.width(); + assert(Sh <= W); + RegisterCell Res = RegisterCell::ref(A1); + Res.rol(W-Sh); + Res.fill(W-Sh, W, BitValue::Zero); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1, + uint16_t Sh) const { + uint16_t W = A1.width(); + assert(Sh <= W); + RegisterCell Res = RegisterCell::ref(A1); + BitValue Sign = Res[W-1]; + Res.rol(W-Sh); + Res.fill(W-Sh, W, Sign); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width(); + assert(W == A2.width()); + RegisterCell Res(W); + for (uint16_t i = 0; i < W; ++i) { + const BitValue &V1 = A1[i]; + const BitValue &V2 = A2[i]; + if (V1.is(1)) + Res[i] = BitValue::ref(V2); + else if (V2.is(1)) + Res[i] = BitValue::ref(V1); + else if (V1.is(0) || V2.is(0)) + Res[i] = BitValue::Zero; + else if (V1 == V2) + Res[i] = V1; + else + Res[i] = BitValue::self(); + } + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width(); + assert(W == A2.width()); + RegisterCell Res(W); + for (uint16_t i = 0; i < W; ++i) { + const BitValue &V1 = A1[i]; + const BitValue &V2 = A2[i]; + if (V1.is(1) || V2.is(1)) + Res[i] = BitValue::One; + else if (V1.is(0)) + Res[i] = BitValue::ref(V2); + else if (V2.is(0)) + Res[i] = BitValue::ref(V1); + else if (V1 == V2) + Res[i] = V1; + else + Res[i] = BitValue::self(); + } + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1, + const RegisterCell &A2) const { + uint16_t W = A1.width(); + assert(W == A2.width()); + RegisterCell Res(W); + for (uint16_t i = 0; i < W; ++i) { + const BitValue &V1 = A1[i]; + const BitValue &V2 = A2[i]; + if (V1.is(0)) + Res[i] = BitValue::ref(V2); + else if (V2.is(0)) + Res[i] = BitValue::ref(V1); + else if (V1 == V2) + Res[i] = BitValue::Zero; + else + Res[i] = BitValue::self(); + } + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const { + uint16_t W = A1.width(); + RegisterCell Res(W); + for (uint16_t i = 0; i < W; ++i) { + const BitValue &V = A1[i]; + if (V.is(0)) + Res[i] = BitValue::One; + else if (V.is(1)) + Res[i] = BitValue::Zero; + else + Res[i] = BitValue::self(); + } + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1, + uint16_t BitN) const { + assert(BitN < A1.width()); + RegisterCell Res = RegisterCell::ref(A1); + Res[BitN] = BitValue::One; + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1, + uint16_t BitN) const { + assert(BitN < A1.width()); + RegisterCell Res = RegisterCell::ref(A1); + Res[BitN] = BitValue::Zero; + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B, + uint16_t W) const { + uint16_t C = A1.cl(B), AW = A1.width(); + // If the last leading non-B bit is not a constant, then we don't know + // the real count. + if ((C < AW && A1[AW-1-C].num()) || C == AW) + return eIMM(C, W); + return RegisterCell::self(0, W); +} + + +BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B, + uint16_t W) const { + uint16_t C = A1.ct(B), AW = A1.width(); + // If the last trailing non-B bit is not a constant, then we don't know + // the real count. + if ((C < AW && A1[C].num()) || C == AW) + return eIMM(C, W); + return RegisterCell::self(0, W); +} + + +BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1, + uint16_t FromN) const { + uint16_t W = A1.width(); + assert(FromN <= W); + RegisterCell Res = RegisterCell::ref(A1); + BitValue Sign = Res[FromN-1]; + // Sign-extend "inreg". + Res.fill(FromN, W, Sign); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1, + uint16_t FromN) const { + uint16_t W = A1.width(); + assert(FromN <= W); + RegisterCell Res = RegisterCell::ref(A1); + Res.fill(FromN, W, BitValue::Zero); + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1, + uint16_t B, uint16_t E) const { + uint16_t W = A1.width(); + assert(B < W && E <= W); + if (B == E) + return RegisterCell(0); + uint16_t Last = (E > 0) ? E-1 : W-1; + RegisterCell Res = RegisterCell::ref(A1).extract(BT::BitMask(B, Last)); + // Return shorter cell. + return Res; +} + + +BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1, + const RegisterCell &A2, uint16_t AtN) const { + uint16_t W1 = A1.width(), W2 = A2.width(); + (void)W1; + assert(AtN < W1 && AtN+W2 <= W1); + // Copy bits from A1, insert A2 at position AtN. + RegisterCell Res = RegisterCell::ref(A1); + if (W2 > 0) + Res.insert(RegisterCell::ref(A2), BT::BitMask(AtN, AtN+W2-1)); + return Res; +} + + +BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const { + assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0"); + uint16_t W = getRegBitWidth(Reg); + assert(W > 0 && "Cannot generate mask for empty register"); + return BitMask(0, W-1); +} + + +bool BT::MachineEvaluator::evaluate(const MachineInstr *MI, + const CellMapType &Inputs, CellMapType &Outputs) const { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::REG_SEQUENCE: { + RegisterRef RD = MI->getOperand(0); + assert(RD.Sub == 0); + RegisterRef RS = MI->getOperand(1); + unsigned SS = MI->getOperand(2).getImm(); + RegisterRef RT = MI->getOperand(3); + unsigned ST = MI->getOperand(4).getImm(); + assert(SS != ST); + + uint16_t W = getRegBitWidth(RD); + RegisterCell Res(W); + Res.insert(RegisterCell::ref(getCell(RS, Inputs)), mask(RD.Reg, SS)); + Res.insert(RegisterCell::ref(getCell(RT, Inputs)), mask(RD.Reg, ST)); + putCell(RD, Res, Outputs); + break; + } + + case TargetOpcode::COPY: { + // COPY can transfer a smaller register into a wider one. + // If that is the case, fill the remaining high bits with 0. + RegisterRef RD = MI->getOperand(0); + RegisterRef RS = MI->getOperand(1); + assert(RD.Sub == 0); + uint16_t WD = getRegBitWidth(RD); + uint16_t WS = getRegBitWidth(RS); + assert(WD >= WS); + RegisterCell Src = getCell(RS, Inputs); + RegisterCell Res(WD); + Res.insert(Src, BitMask(0, WS-1)); + Res.fill(WS, WD, BitValue::Zero); + putCell(RD, Res, Outputs); + break; + } + + default: + return false; + } + + return true; +} + + +// Main W-Z implementation. + +void BT::visitPHI(const MachineInstr *PI) { + int ThisN = PI->getParent()->getNumber(); + if (Trace) + dbgs() << "Visit FI(BB#" << ThisN << "): " << *PI; + + const MachineOperand &MD = PI->getOperand(0); + assert(MD.getSubReg() == 0 && "Unexpected sub-register in definition"); + RegisterRef DefRR(MD); + uint16_t DefBW = ME.getRegBitWidth(DefRR); + + RegisterCell DefC = ME.getCell(DefRR, Map); + if (DefC == RegisterCell::self(DefRR.Reg, DefBW)) // XXX slow + return; + + bool Changed = false; + + for (unsigned i = 1, n = PI->getNumOperands(); i < n; i += 2) { + const MachineBasicBlock *PB = PI->getOperand(i+1).getMBB(); + int PredN = PB->getNumber(); + if (Trace) + dbgs() << " edge BB#" << PredN << "->BB#" << ThisN; + if (!EdgeExec.count(CFGEdge(PredN, ThisN))) { + if (Trace) + dbgs() << " not executable\n"; + continue; + } + + RegisterRef RU = PI->getOperand(i); + RegisterCell ResC = ME.getCell(RU, Map); + if (Trace) + dbgs() << " input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub) + << " cell: " << ResC << "\n"; + Changed |= DefC.meet(ResC, DefRR.Reg); + } + + if (Changed) { + if (Trace) + dbgs() << "Output: " << PrintReg(DefRR.Reg, &ME.TRI, DefRR.Sub) + << " cell: " << DefC << "\n"; + ME.putCell(DefRR, DefC, Map); + visitUsesOf(DefRR.Reg); + } +} + + +void BT::visitNonBranch(const MachineInstr *MI) { + if (Trace) { + int ThisN = MI->getParent()->getNumber(); + dbgs() << "Visit MI(BB#" << ThisN << "): " << *MI; + } + if (MI->isDebugValue()) + return; + assert(!MI->isBranch() && "Unexpected branch instruction"); + + CellMapType ResMap; + bool Eval = ME.evaluate(MI, Map, ResMap); + + if (Trace && Eval) { + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + RegisterRef RU(MO); + dbgs() << " input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub) + << " cell: " << ME.getCell(RU, Map) << "\n"; + } + dbgs() << "Outputs:\n"; + for (CellMapType::iterator I = ResMap.begin(), E = ResMap.end(); + I != E; ++I) { + RegisterRef RD(I->first); + dbgs() << " " << PrintReg(I->first, &ME.TRI) << " cell: " + << ME.getCell(RD, ResMap) << "\n"; + } + } + + // Iterate over all definitions of the instruction, and update the + // cells accordingly. + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Visit register defs only. + if (!MO.isReg() || !MO.isDef()) + continue; + RegisterRef RD(MO); + assert(RD.Sub == 0 && "Unexpected sub-register in definition"); + if (!TargetRegisterInfo::isVirtualRegister(RD.Reg)) + continue; + + bool Changed = false; + if (!Eval || !ResMap.has(RD.Reg)) { + // Set to "ref" (aka "bottom"). + uint16_t DefBW = ME.getRegBitWidth(RD); + RegisterCell RefC = RegisterCell::self(RD.Reg, DefBW); + if (RefC != ME.getCell(RD, Map)) { + ME.putCell(RD, RefC, Map); + Changed = true; + } + } else { + RegisterCell DefC = ME.getCell(RD, Map); + RegisterCell ResC = ME.getCell(RD, ResMap); + // This is a non-phi instruction, so the values of the inputs come + // from the same registers each time this instruction is evaluated. + // During the propagation, the values of the inputs can become lowered + // in the sense of the lattice operation, which may cause different + // results to be calculated in subsequent evaluations. This should + // not cause the bottoming of the result in the map, since the new + // result is already reflecting the lowered inputs. + for (uint16_t i = 0, w = DefC.width(); i < w; ++i) { + BitValue &V = DefC[i]; + // Bits that are already "bottom" should not be updated. + if (V.Type == BitValue::Ref && V.RefI.Reg == RD.Reg) + continue; + // Same for those that are identical in DefC and ResC. + if (V == ResC[i]) + continue; + V = ResC[i]; + Changed = true; + } + if (Changed) + ME.putCell(RD, DefC, Map); + } + if (Changed) + visitUsesOf(RD.Reg); + } +} + + +void BT::visitBranchesFrom(const MachineInstr *BI) { + const MachineBasicBlock &B = *BI->getParent(); + MachineBasicBlock::const_iterator It = BI, End = B.end(); + BranchTargetList Targets, BTs; + bool FallsThrough = true, DefaultToAll = false; + int ThisN = B.getNumber(); + + do { + BTs.clear(); + const MachineInstr *MI = &*It; + if (Trace) + dbgs() << "Visit BR(BB#" << ThisN << "): " << *MI; + assert(MI->isBranch() && "Expecting branch instruction"); + InstrExec.insert(MI); + bool Eval = ME.evaluate(MI, Map, BTs, FallsThrough); + if (!Eval) { + // If the evaluation failed, we will add all targets. Keep going in + // the loop to mark all executable branches as such. + DefaultToAll = true; + FallsThrough = true; + if (Trace) + dbgs() << " failed to evaluate: will add all CFG successors\n"; + } else if (!DefaultToAll) { + // If evaluated successfully add the targets to the cumulative list. + if (Trace) { + dbgs() << " adding targets:"; + for (unsigned i = 0, n = BTs.size(); i < n; ++i) + dbgs() << " BB#" << BTs[i]->getNumber(); + if (FallsThrough) + dbgs() << "\n falls through\n"; + else + dbgs() << "\n does not fall through\n"; + } + Targets.insert(BTs.begin(), BTs.end()); + } + ++It; + } while (FallsThrough && It != End); + + typedef MachineBasicBlock::const_succ_iterator succ_iterator; + if (!DefaultToAll) { + // Need to add all CFG successors that lead to EH landing pads. + // There won't be explicit branches to these blocks, but they must + // be processed. + for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) { + const MachineBasicBlock *SB = *I; + if (SB->isLandingPad()) + Targets.insert(SB); + } + if (FallsThrough) { + MachineFunction::const_iterator BIt = &B; + MachineFunction::const_iterator Next = std::next(BIt); + if (Next != MF.end()) + Targets.insert(&*Next); + } + } else { + for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) + Targets.insert(*I); + } + + for (unsigned i = 0, n = Targets.size(); i < n; ++i) { + int TargetN = Targets[i]->getNumber(); + FlowQ.push(CFGEdge(ThisN, TargetN)); + } +} + + +void BT::visitUsesOf(unsigned Reg) { + if (Trace) + dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n"; + + typedef MachineRegisterInfo::use_nodbg_iterator use_iterator; + use_iterator End = MRI.use_nodbg_end(); + for (use_iterator I = MRI.use_nodbg_begin(Reg); I != End; ++I) { + MachineInstr *UseI = I->getParent(); + if (!InstrExec.count(UseI)) + continue; + if (UseI->isPHI()) + visitPHI(UseI); + else if (!UseI->isBranch()) + visitNonBranch(UseI); + else + visitBranchesFrom(UseI); + } +} + + +BT::RegisterCell BT::get(RegisterRef RR) const { + return ME.getCell(RR, Map); +} + + +void BT::put(RegisterRef RR, const RegisterCell &RC) { + ME.putCell(RR, RC, Map); +} + + +// Replace all references to bits from OldRR with the corresponding bits +// in NewRR. +void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { + assert(Map.has(OldRR.Reg) && "OldRR not present in map"); + BitMask OM = ME.mask(OldRR.Reg, OldRR.Sub); + BitMask NM = ME.mask(NewRR.Reg, NewRR.Sub); + uint16_t OMB = OM.first(), OME = OM.last(); + uint16_t NMB = NM.first(), NME = NM.last(); + (void)NME; + assert((OME-OMB == NME-NMB) && + "Substituting registers of different lengths"); + for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { + RegisterCell &RC = I->second; + for (uint16_t i = 0, w = RC.width(); i < w; ++i) { + BitValue &V = RC[i]; + if (V.Type != BitValue::Ref || V.RefI.Reg != OldRR.Reg) + continue; + if (V.RefI.Pos < OMB || V.RefI.Pos > OME) + continue; + V.RefI.Reg = NewRR.Reg; + V.RefI.Pos += NMB-OMB; + } + } +} + + +// Check if the block has been "executed" during propagation. (If not, the +// block is dead, but it may still appear to be reachable.) +bool BT::reached(const MachineBasicBlock *B) const { + int BN = B->getNumber(); + assert(BN >= 0); + for (EdgeSetType::iterator I = EdgeExec.begin(), E = EdgeExec.end(); + I != E; ++I) { + if (I->second == BN) + return true; + } + return false; +} + + +void BT::reset() { + EdgeExec.clear(); + InstrExec.clear(); + Map.clear(); +} + + +void BT::run() { + reset(); + assert(FlowQ.empty()); + + typedef GraphTraits<const MachineFunction*> MachineFlowGraphTraits; + const MachineBasicBlock *Entry = MachineFlowGraphTraits::getEntryNode(&MF); + + unsigned MaxBN = 0; + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + assert(I->getNumber() >= 0 && "Disconnected block"); + unsigned BN = I->getNumber(); + if (BN > MaxBN) + MaxBN = BN; + } + + // Keep track of visited blocks. + BitVector BlockScanned(MaxBN+1); + + int EntryN = Entry->getNumber(); + // Generate a fake edge to get something to start with. + FlowQ.push(CFGEdge(-1, EntryN)); + + while (!FlowQ.empty()) { + CFGEdge Edge = FlowQ.front(); + FlowQ.pop(); + + if (EdgeExec.count(Edge)) + continue; + EdgeExec.insert(Edge); + + const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second); + MachineBasicBlock::const_iterator It = B.begin(), End = B.end(); + // Visit PHI nodes first. + while (It != End && It->isPHI()) { + const MachineInstr *PI = &*It++; + InstrExec.insert(PI); + visitPHI(PI); + } + + // If this block has already been visited through a flow graph edge, + // then the instructions have already been processed. Any updates to + // the cells would now only happen through visitUsesOf... + if (BlockScanned[Edge.second]) + continue; + BlockScanned[Edge.second] = true; + + // Visit non-branch instructions. + while (It != End && !It->isBranch()) { + const MachineInstr *MI = &*It++; + InstrExec.insert(MI); + visitNonBranch(MI); + } + // If block end has been reached, add the fall-through edge to the queue. + if (It == End) { + MachineFunction::const_iterator BIt = &B; + MachineFunction::const_iterator Next = std::next(BIt); + if (Next != MF.end()) { + int ThisN = B.getNumber(); + int NextN = Next->getNumber(); + FlowQ.push(CFGEdge(ThisN, NextN)); + } + } else { + // Handle the remaining sequence of branches. This function will update + // the work queue. + visitBranchesFrom(It); + } + } // while (!FlowQ->empty()) + + if (Trace) { + dbgs() << "Cells after propagation:\n"; + for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) + dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n"; + } +} + diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h new file mode 100644 index 000000000000..ed002a794d66 --- /dev/null +++ b/lib/Target/Hexagon/BitTracker.h @@ -0,0 +1,449 @@ +//===--- BitTracker.h -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef BITTRACKER_H +#define BITTRACKER_H + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunction.h" + +#include <map> +#include <queue> +#include <set> + +namespace llvm { + class ConstantInt; + class MachineRegisterInfo; + class MachineBasicBlock; + class MachineInstr; + class MachineOperand; + class raw_ostream; + +struct BitTracker { + struct BitRef; + struct RegisterRef; + struct BitValue; + struct BitMask; + struct RegisterCell; + struct MachineEvaluator; + + typedef SetVector<const MachineBasicBlock *> BranchTargetList; + + struct CellMapType : public std::map<unsigned,RegisterCell> { + bool has(unsigned Reg) const; + }; + + BitTracker(const MachineEvaluator &E, MachineFunction &F); + ~BitTracker(); + + void run(); + void trace(bool On = false) { Trace = On; } + bool has(unsigned Reg) const; + const RegisterCell &lookup(unsigned Reg) const; + RegisterCell get(RegisterRef RR) const; + void put(RegisterRef RR, const RegisterCell &RC); + void subst(RegisterRef OldRR, RegisterRef NewRR); + bool reached(const MachineBasicBlock *B) const; + +private: + void visitPHI(const MachineInstr *PI); + void visitNonBranch(const MachineInstr *MI); + void visitBranchesFrom(const MachineInstr *BI); + void visitUsesOf(unsigned Reg); + void reset(); + + typedef std::pair<int,int> CFGEdge; + typedef std::set<CFGEdge> EdgeSetType; + typedef std::set<const MachineInstr *> InstrSetType; + typedef std::queue<CFGEdge> EdgeQueueType; + + EdgeSetType EdgeExec; // Executable flow graph edges. + InstrSetType InstrExec; // Executable instructions. + EdgeQueueType FlowQ; // Work queue of CFG edges. + bool Trace; // Enable tracing for debugging. + + const MachineEvaluator &ME; + MachineFunction &MF; + MachineRegisterInfo &MRI; + CellMapType ⤅ +}; + + +// Abstraction of a reference to bit at position Pos from a register Reg. +struct BitTracker::BitRef { + BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {} + BitRef(const BitRef &BR) : Reg(BR.Reg), Pos(BR.Pos) {} + bool operator== (const BitRef &BR) const { + // If Reg is 0, disregard Pos. + return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos); + } + unsigned Reg; + uint16_t Pos; +}; + + +// Abstraction of a register reference in MachineOperand. It contains the +// register number and the subregister index. +struct BitTracker::RegisterRef { + RegisterRef(unsigned R = 0, unsigned S = 0) + : Reg(R), Sub(S) {} + RegisterRef(const MachineOperand &MO) + : Reg(MO.getReg()), Sub(MO.getSubReg()) {} + unsigned Reg, Sub; +}; + + +// Value that a single bit can take. This is outside of the context of +// any register, it is more of an abstraction of the two-element set of +// possible bit values. One extension here is the "Ref" type, which +// indicates that this bit takes the same value as the bit described by +// RefInfo. +struct BitTracker::BitValue { + enum ValueType { + Top, // Bit not yet defined. + Zero, // Bit = 0. + One, // Bit = 1. + Ref // Bit value same as the one described in RefI. + // Conceptually, there is no explicit "bottom" value: the lattice's + // bottom will be expressed as a "ref to itself", which, in the context + // of registers, could be read as "this value of this bit is defined by + // this bit". + // The ordering is: + // x <= Top, + // Self <= x, where "Self" is "ref to itself". + // This makes the value lattice different for each virtual register + // (even for each bit in the same virtual register), since the "bottom" + // for one register will be a simple "ref" for another register. + // Since we do not store the "Self" bit and register number, the meet + // operation will need to take it as a parameter. + // + // In practice there is a special case for values that are not associa- + // ted with any specific virtual register. An example would be a value + // corresponding to a bit of a physical register, or an intermediate + // value obtained in some computation (such as instruction evaluation). + // Such cases are identical to the usual Ref type, but the register + // number is 0. In such case the Pos field of the reference is ignored. + // + // What is worthy of notice is that in value V (that is a "ref"), as long + // as the RefI.Reg is not 0, it may actually be the same register as the + // one in which V will be contained. If the RefI.Pos refers to the posi- + // tion of V, then V is assumed to be "bottom" (as a "ref to itself"), + // otherwise V is taken to be identical to the referenced bit of the + // same register. + // If RefI.Reg is 0, however, such a reference to the same register is + // not possible. Any value V that is a "ref", and whose RefI.Reg is 0 + // is treated as "bottom". + }; + ValueType Type; + BitRef RefI; + + BitValue(ValueType T = Top) : Type(T) {} + BitValue(bool B) : Type(B ? One : Zero) {} + BitValue(const BitValue &V) : Type(V.Type), RefI(V.RefI) {} + BitValue(unsigned Reg, uint16_t Pos) : Type(Ref), RefI(Reg, Pos) {} + + bool operator== (const BitValue &V) const { + if (Type != V.Type) + return false; + if (Type == Ref && !(RefI == V.RefI)) + return false; + return true; + } + bool operator!= (const BitValue &V) const { + return !operator==(V); + } + bool is(unsigned T) const { + assert(T == 0 || T == 1); + return T == 0 ? Type == Zero + : (T == 1 ? Type == One : false); + } + + // The "meet" operation is the "." operation in a semilattice (L, ., T, B): + // (1) x.x = x + // (2) x.y = y.x + // (3) x.(y.z) = (x.y).z + // (4) x.T = x (i.e. T = "top") + // (5) x.B = B (i.e. B = "bottom") + // + // This "meet" function will update the value of the "*this" object with + // the newly calculated one, and return "true" if the value of *this has + // changed, and "false" otherwise. + // To prove that it satisfies the conditions (1)-(5), it is sufficient + // to show that a relation + // x <= y <=> x.y = x + // defines a partial order (i.e. that "meet" is same as "infimum"). + bool meet(const BitValue &V, const BitRef &Self) { + // First, check the cases where there is nothing to be done. + if (Type == Ref && RefI == Self) // Bottom.meet(V) = Bottom (i.e. This) + return false; + if (V.Type == Top) // This.meet(Top) = This + return false; + if (*this == V) // This.meet(This) = This + return false; + + // At this point, we know that the value of "this" will change. + // If it is Top, it will become the same as V, otherwise it will + // become "bottom" (i.e. Self). + if (Type == Top) { + Type = V.Type; + RefI = V.RefI; // This may be irrelevant, but copy anyway. + return true; + } + // Become "bottom". + Type = Ref; + RefI = Self; + return true; + } + + // Create a reference to the bit value V. + static BitValue ref(const BitValue &V); + // Create a "self". + static BitValue self(const BitRef &Self = BitRef()); + + bool num() const { + return Type == Zero || Type == One; + } + operator bool() const { + assert(Type == Zero || Type == One); + return Type == One; + } + + friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV); +}; + + +// This operation must be idempotent, i.e. ref(ref(V)) == ref(V). +inline BitTracker::BitValue +BitTracker::BitValue::ref(const BitValue &V) { + if (V.Type != Ref) + return BitValue(V.Type); + if (V.RefI.Reg != 0) + return BitValue(V.RefI.Reg, V.RefI.Pos); + return self(); +} + + +inline BitTracker::BitValue +BitTracker::BitValue::self(const BitRef &Self) { + return BitValue(Self.Reg, Self.Pos); +} + + +// A sequence of bits starting from index B up to and including index E. +// If E < B, the mask represents two sections: [0..E] and [B..W) where +// W is the width of the register. +struct BitTracker::BitMask { + BitMask() : B(0), E(0) {} + BitMask(uint16_t b, uint16_t e) : B(b), E(e) {} + uint16_t first() const { return B; } + uint16_t last() const { return E; } +private: + uint16_t B, E; +}; + + +// Representation of a register: a list of BitValues. +struct BitTracker::RegisterCell { + RegisterCell(uint16_t Width = DefaultBitN) : Bits(Width) {} + + uint16_t width() const { + return Bits.size(); + } + const BitValue &operator[](uint16_t BitN) const { + assert(BitN < Bits.size()); + return Bits[BitN]; + } + BitValue &operator[](uint16_t BitN) { + assert(BitN < Bits.size()); + return Bits[BitN]; + } + + bool meet(const RegisterCell &RC, unsigned SelfR); + RegisterCell &insert(const RegisterCell &RC, const BitMask &M); + RegisterCell extract(const BitMask &M) const; // Returns a new cell. + RegisterCell &rol(uint16_t Sh); // Rotate left. + RegisterCell &fill(uint16_t B, uint16_t E, const BitValue &V); + RegisterCell &cat(const RegisterCell &RC); // Concatenate. + uint16_t cl(bool B) const; + uint16_t ct(bool B) const; + + bool operator== (const RegisterCell &RC) const; + bool operator!= (const RegisterCell &RC) const { + return !operator==(RC); + } + + const RegisterCell &operator=(const RegisterCell &RC) { + Bits = RC.Bits; + return *this; + } + + // Generate a "ref" cell for the corresponding register. In the resulting + // cell each bit will be described as being the same as the corresponding + // bit in register Reg (i.e. the cell is "defined" by register Reg). + static RegisterCell self(unsigned Reg, uint16_t Width); + // Generate a "top" cell of given size. + static RegisterCell top(uint16_t Width); + // Generate a cell that is a "ref" to another cell. + static RegisterCell ref(const RegisterCell &C); + +private: + // The DefaultBitN is here only to avoid frequent reallocation of the + // memory in the vector. + static const unsigned DefaultBitN = 32; + typedef SmallVector<BitValue, DefaultBitN> BitValueList; + BitValueList Bits; + + friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC); +}; + + +inline bool BitTracker::has(unsigned Reg) const { + return Map.find(Reg) != Map.end(); +} + + +inline const BitTracker::RegisterCell& +BitTracker::lookup(unsigned Reg) const { + CellMapType::const_iterator F = Map.find(Reg); + assert(F != Map.end()); + return F->second; +} + + +inline BitTracker::RegisterCell +BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) { + RegisterCell RC(Width); + for (uint16_t i = 0; i < Width; ++i) + RC.Bits[i] = BitValue::self(BitRef(Reg, i)); + return RC; +} + + +inline BitTracker::RegisterCell +BitTracker::RegisterCell::top(uint16_t Width) { + RegisterCell RC(Width); + for (uint16_t i = 0; i < Width; ++i) + RC.Bits[i] = BitValue(BitValue::Top); + return RC; +} + + +inline BitTracker::RegisterCell +BitTracker::RegisterCell::ref(const RegisterCell &C) { + uint16_t W = C.width(); + RegisterCell RC(W); + for (unsigned i = 0; i < W; ++i) + RC[i] = BitValue::ref(C[i]); + return RC; +} + + +inline bool BitTracker::CellMapType::has(unsigned Reg) const { + return find(Reg) != end(); +} + +// A class to evaluate target's instructions and update the cell maps. +// This is used internally by the bit tracker. A target that wants to +// utilize this should implement the evaluation functions (noted below) +// in a subclass of this class. +struct BitTracker::MachineEvaluator { + MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M) + : TRI(T), MRI(M) {} + virtual ~MachineEvaluator() {} + + uint16_t getRegBitWidth(const RegisterRef &RR) const; + + RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const; + void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const; + // A result of any operation should use refs to the source cells, not + // the cells directly. This function is a convenience wrapper to quickly + // generate a ref for a cell corresponding to a register reference. + RegisterCell getRef(const RegisterRef &RR, const CellMapType &M) const { + RegisterCell RC = getCell(RR, M); + return RegisterCell::ref(RC); + } + + // Helper functions. + // Check if a cell is an immediate value (i.e. all bits are either 0 or 1). + bool isInt(const RegisterCell &A) const; + // Convert cell to an immediate value. + uint64_t toInt(const RegisterCell &A) const; + + // Generate cell from an immediate value. + RegisterCell eIMM(int64_t V, uint16_t W) const; + RegisterCell eIMM(const ConstantInt *CI) const; + + // Arithmetic. + RegisterCell eADD(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eSUB(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eMLS(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eMLU(const RegisterCell &A1, const RegisterCell &A2) const; + + // Shifts. + RegisterCell eASL(const RegisterCell &A1, uint16_t Sh) const; + RegisterCell eLSR(const RegisterCell &A1, uint16_t Sh) const; + RegisterCell eASR(const RegisterCell &A1, uint16_t Sh) const; + + // Logical. + RegisterCell eAND(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eORL(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eXOR(const RegisterCell &A1, const RegisterCell &A2) const; + RegisterCell eNOT(const RegisterCell &A1) const; + + // Set bit, clear bit. + RegisterCell eSET(const RegisterCell &A1, uint16_t BitN) const; + RegisterCell eCLR(const RegisterCell &A1, uint16_t BitN) const; + + // Count leading/trailing bits (zeros/ones). + RegisterCell eCLB(const RegisterCell &A1, bool B, uint16_t W) const; + RegisterCell eCTB(const RegisterCell &A1, bool B, uint16_t W) const; + + // Sign/zero extension. + RegisterCell eSXT(const RegisterCell &A1, uint16_t FromN) const; + RegisterCell eZXT(const RegisterCell &A1, uint16_t FromN) const; + + // Extract/insert + // XTR R,b,e: extract bits from A1 starting at bit b, ending at e-1. + // INS R,S,b: take R and replace bits starting from b with S. + RegisterCell eXTR(const RegisterCell &A1, uint16_t B, uint16_t E) const; + RegisterCell eINS(const RegisterCell &A1, const RegisterCell &A2, + uint16_t AtN) const; + + // User-provided functions for individual targets: + + // Return a sub-register mask that indicates which bits in Reg belong + // to the subregister Sub. These bits are assumed to be contiguous in + // the super-register, and have the same ordering in the sub-register + // as in the super-register. It is valid to call this function with + // Sub == 0, in this case, the function should return a mask that spans + // the entire register Reg (which is what the default implementation + // does). + virtual BitMask mask(unsigned Reg, unsigned Sub) const; + // Indicate whether a given register class should be tracked. + virtual bool track(const TargetRegisterClass *RC) const { return true; } + // Evaluate a non-branching machine instruction, given the cell map with + // the input values. Place the results in the Outputs map. Return "true" + // if evaluation succeeded, "false" otherwise. + virtual bool evaluate(const MachineInstr *MI, const CellMapType &Inputs, + CellMapType &Outputs) const; + // Evaluate a branch, given the cell map with the input values. Fill out + // a list of all possible branch targets and indicate (through a flag) + // whether the branch could fall-through. Return "true" if this information + // has been successfully computed, "false" otherwise. + virtual bool evaluate(const MachineInstr *BI, const CellMapType &Inputs, + BranchTargetList &Targets, bool &FallsThru) const = 0; + + const TargetRegisterInfo &TRI; + MachineRegisterInfo &MRI; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 758ccc741007..7ab2f0ba01df 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -12,13 +12,19 @@ tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(HexagonCommonTableGen) add_llvm_target(HexagonCodeGen + BitTracker.cpp HexagonAsmPrinter.cpp + HexagonBitTracker.cpp HexagonCFGOptimizer.cpp + HexagonCommonGEP.cpp HexagonCopyToCombine.cpp HexagonExpandCondsets.cpp HexagonExpandPredSpillCode.cpp HexagonFixupHwLoops.cpp HexagonFrameLowering.cpp + HexagonGenExtract.cpp + HexagonGenInsert.cpp + HexagonGenPredicate.cpp HexagonHardwareLoops.cpp HexagonInstrInfo.cpp HexagonISelDAGToDAG.cpp diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp new file mode 100644 index 000000000000..021e58a1d08a --- /dev/null +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -0,0 +1,1174 @@ +//===--- HexagonBitTracker.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonTargetMachine.h" +#include "HexagonBitTracker.h" + +using namespace llvm; + +typedef BitTracker BT; + +HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, + MachineRegisterInfo &mri, + const HexagonInstrInfo &tii, + MachineFunction &mf) + : MachineEvaluator(tri, mri), MF(mf), MFI(*mf.getFrameInfo()), TII(tii) { + // Populate the VRX map (VR to extension-type). + // Go over all the formal parameters of the function. If a given parameter + // P is sign- or zero-extended, locate the virtual register holding that + // parameter and create an entry in the VRX map indicating the type of ex- + // tension (and the source type). + // This is a bit complicated to do accurately, since the memory layout in- + // formation is necessary to precisely determine whether an aggregate para- + // meter will be passed in a register or in memory. What is given in MRI + // is the association between the physical register that is live-in (i.e. + // holds an argument), and the virtual register that this value will be + // copied into. This, by itself, is not sufficient to map back the virtual + // register to a formal parameter from Function (since consecutive live-ins + // from MRI may not correspond to consecutive formal parameters from Func- + // tion). To avoid the complications with in-memory arguments, only consi- + // der the initial sequence of formal parameters that are known to be + // passed via registers. + unsigned AttrIdx = 0; + unsigned InVirtReg, InPhysReg = 0; + const Function &F = *MF.getFunction(); + typedef Function::const_arg_iterator arg_iterator; + for (arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { + AttrIdx++; + const Argument &Arg = *I; + Type *ATy = Arg.getType(); + unsigned Width = 0; + if (ATy->isIntegerTy()) + Width = ATy->getIntegerBitWidth(); + else if (ATy->isPointerTy()) + Width = 32; + // If pointer size is not set through target data, it will default to + // Module::AnyPointerSize. + if (Width == 0 || Width > 64) + break; + InPhysReg = getNextPhysReg(InPhysReg, Width); + if (!InPhysReg) + break; + InVirtReg = getVirtRegFor(InPhysReg); + if (!InVirtReg) + continue; + AttributeSet Attrs = F.getAttributes(); + if (Attrs.hasAttribute(AttrIdx, Attribute::SExt)) + VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt, Width))); + else if (Attrs.hasAttribute(AttrIdx, Attribute::ZExt)) + VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::ZExt, Width))); + } +} + + +BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { + if (Sub == 0) + return MachineEvaluator::mask(Reg, 0); + using namespace Hexagon; + const TargetRegisterClass *RC = MRI.getRegClass(Reg); + unsigned ID = RC->getID(); + uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub)); + switch (ID) { + case DoubleRegsRegClassID: + return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1) + : BT::BitMask(RW, 2*RW-1); + default: + break; + } +#ifndef NDEBUG + dbgs() << PrintReg(Reg, &TRI, Sub) << '\n'; +#endif + llvm_unreachable("Unexpected register/subregister"); +} + + +namespace { + struct RegisterRefs : public std::vector<BT::RegisterRef> { + typedef std::vector<BT::RegisterRef> Base; + RegisterRefs(const MachineInstr *MI); + const BT::RegisterRef &operator[](unsigned n) const { + // The main purpose of this operator is to assert with bad argument. + assert(n < size()); + return Base::operator[](n); + } + }; + + RegisterRefs::RegisterRefs(const MachineInstr *MI) + : Base(MI->getNumOperands()) { + for (unsigned i = 0, n = size(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) + at(i) = BT::RegisterRef(MO); + // For indices that don't correspond to registers, the entry will + // remain constructed via the default constructor. + } + } +} + + +bool HexagonEvaluator::evaluate(const MachineInstr *MI, + const CellMapType &Inputs, CellMapType &Outputs) const { + unsigned NumDefs = 0; + + // Sanity verification: there should not be any defs with subregisters. + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + NumDefs++; + assert(MO.getSubReg() == 0); + } + + if (NumDefs == 0) + return false; + + if (MI->mayLoad()) + return evaluateLoad(MI, Inputs, Outputs); + + // Check COPY instructions that copy formal parameters into virtual + // registers. Such parameters can be sign- or zero-extended at the + // call site, and we should take advantage of this knowledge. The MRI + // keeps a list of pairs of live-in physical and virtual registers, + // which provides information about which virtual registers will hold + // the argument values. The function will still contain instructions + // defining those virtual registers, and in practice those are COPY + // instructions from a physical to a virtual register. In such cases, + // applying the argument extension to the virtual register can be seen + // as simply mirroring the extension that had already been applied to + // the physical register at the call site. If the defining instruction + // was not a COPY, it would not be clear how to mirror that extension + // on the callee's side. For that reason, only check COPY instructions + // for potential extensions. + if (MI->isCopy()) { + if (evaluateFormalCopy(MI, Inputs, Outputs)) + return true; + } + + // Beyond this point, if any operand is a global, skip that instruction. + // The reason is that certain instructions that can take an immediate + // operand can also have a global symbol in that operand. To avoid + // checking what kind of operand a given instruction has individually + // for each instruction, do it here. Global symbols as operands gene- + // rally do not provide any useful information. + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isGlobal() || MO.isBlockAddress() || MO.isSymbol() || MO.isJTI() || + MO.isCPI()) + return false; + } + + RegisterRefs Reg(MI); + unsigned Opc = MI->getOpcode(); + using namespace Hexagon; + #define op(i) MI->getOperand(i) + #define rc(i) RegisterCell::ref(getCell(Reg[i],Inputs)) + #define im(i) MI->getOperand(i).getImm() + + // If the instruction has no register operands, skip it. + if (Reg.size() == 0) + return false; + + // Record result for register in operand 0. + auto rr0 = [this,Reg] (const BT::RegisterCell &Val, CellMapType &Outputs) + -> bool { + putCell(Reg[0], Val, Outputs); + return true; + }; + // Get the cell corresponding to the N-th operand. + auto cop = [this,Reg,MI,Inputs] (unsigned N, uint16_t W) + -> BT::RegisterCell { + const MachineOperand &Op = MI->getOperand(N); + if (Op.isImm()) + return eIMM(Op.getImm(), W); + if (!Op.isReg()) + return RegisterCell::self(0, W); + assert(getRegBitWidth(Reg[N]) == W && "Register width mismatch"); + return rc(N); + }; + // Extract RW low bits of the cell. + auto lo = [this] (const BT::RegisterCell &RC, uint16_t RW) + -> BT::RegisterCell { + assert(RW <= RC.width()); + return eXTR(RC, 0, RW); + }; + // Extract RW high bits of the cell. + auto hi = [this] (const BT::RegisterCell &RC, uint16_t RW) + -> BT::RegisterCell { + uint16_t W = RC.width(); + assert(RW <= W); + return eXTR(RC, W-RW, W); + }; + // Extract N-th halfword (counting from the least significant position). + auto half = [this] (const BT::RegisterCell &RC, unsigned N) + -> BT::RegisterCell { + assert(N*16+16 <= RC.width()); + return eXTR(RC, N*16, N*16+16); + }; + // Shuffle bits (pick even/odd from cells and merge into result). + auto shuffle = [this] (const BT::RegisterCell &Rs, const BT::RegisterCell &Rt, + uint16_t BW, bool Odd) -> BT::RegisterCell { + uint16_t I = Odd, Ws = Rs.width(); + assert(Ws == Rt.width()); + RegisterCell RC = eXTR(Rt, I*BW, I*BW+BW).cat(eXTR(Rs, I*BW, I*BW+BW)); + I += 2; + while (I*BW < Ws) { + RC.cat(eXTR(Rt, I*BW, I*BW+BW)).cat(eXTR(Rs, I*BW, I*BW+BW)); + I += 2; + } + return RC; + }; + + // The bitwidth of the 0th operand. In most (if not all) of the + // instructions below, the 0th operand is the defined register. + // Pre-compute the bitwidth here, because it is needed in many cases + // cases below. + uint16_t W0 = (Reg[0].Reg != 0) ? getRegBitWidth(Reg[0]) : 0; + + switch (Opc) { + // Transfer immediate: + + case A2_tfrsi: + case A2_tfrpi: + case CONST32: + case CONST32_Float_Real: + case CONST32_Int_Real: + case CONST64_Float_Real: + case CONST64_Int_Real: + return rr0(eIMM(im(1), W0), Outputs); + case TFR_PdFalse: + return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::Zero), Outputs); + case TFR_PdTrue: + return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::One), Outputs); + case TFR_FI: { + int FI = op(1).getIndex(); + int Off = op(2).getImm(); + unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off); + unsigned L = Log2_32(A); + RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0); + RC.fill(0, L, BT::BitValue::Zero); + return rr0(RC, Outputs); + } + + // Transfer register: + + case A2_tfr: + case A2_tfrp: + case C2_pxfer_map: + return rr0(rc(1), Outputs); + case C2_tfrpr: { + uint16_t RW = W0; + uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]); + assert(PW <= RW); + RegisterCell PC = eXTR(rc(1), 0, PW); + RegisterCell RC = RegisterCell(RW).insert(PC, BT::BitMask(0, PW-1)); + RC.fill(PW, RW, BT::BitValue::Zero); + return rr0(RC, Outputs); + } + case C2_tfrrp: { + RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0); + W0 = 8; // XXX Pred size + return rr0(eINS(RC, eXTR(rc(1), 0, W0), 0), Outputs); + } + + // Arithmetic: + + case A2_abs: + case A2_absp: + // TODO + break; + + case A2_addsp: { + uint16_t W1 = getRegBitWidth(Reg[1]); + assert(W0 == 64 && W1 == 32); + RegisterCell CW = RegisterCell(W0).insert(rc(1), BT::BitMask(0, W1-1)); + RegisterCell RC = eADD(eSXT(CW, W1), rc(2)); + return rr0(RC, Outputs); + } + case A2_add: + case A2_addp: + return rr0(eADD(rc(1), rc(2)), Outputs); + case A2_addi: + return rr0(eADD(rc(1), eIMM(im(2), W0)), Outputs); + case S4_addi_asl_ri: { + RegisterCell RC = eADD(eIMM(im(1), W0), eASL(rc(2), im(3))); + return rr0(RC, Outputs); + } + case S4_addi_lsr_ri: { + RegisterCell RC = eADD(eIMM(im(1), W0), eLSR(rc(2), im(3))); + return rr0(RC, Outputs); + } + case S4_addaddi: { + RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0))); + return rr0(RC, Outputs); + } + case M4_mpyri_addi: { + RegisterCell M = eMLS(rc(2), eIMM(im(3), W0)); + RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0)); + return rr0(RC, Outputs); + } + case M4_mpyrr_addi: { + RegisterCell M = eMLS(rc(2), rc(3)); + RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0)); + return rr0(RC, Outputs); + } + case M4_mpyri_addr_u2: { + RegisterCell M = eMLS(eIMM(im(2), W0), rc(3)); + RegisterCell RC = eADD(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case M4_mpyri_addr: { + RegisterCell M = eMLS(rc(2), eIMM(im(3), W0)); + RegisterCell RC = eADD(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case M4_mpyrr_addr: { + RegisterCell M = eMLS(rc(2), rc(3)); + RegisterCell RC = eADD(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case S4_subaddi: { + RegisterCell RC = eADD(rc(1), eSUB(eIMM(im(2), W0), rc(3))); + return rr0(RC, Outputs); + } + case M2_accii: { + RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0))); + return rr0(RC, Outputs); + } + case M2_acci: { + RegisterCell RC = eADD(rc(1), eADD(rc(2), rc(3))); + return rr0(RC, Outputs); + } + case M2_subacc: { + RegisterCell RC = eADD(rc(1), eSUB(rc(2), rc(3))); + return rr0(RC, Outputs); + } + case S2_addasl_rrri: { + RegisterCell RC = eADD(rc(1), eASL(rc(2), im(3))); + return rr0(RC, Outputs); + } + case C4_addipc: { + RegisterCell RPC = RegisterCell::self(Reg[0].Reg, W0); + RPC.fill(0, 2, BT::BitValue::Zero); + return rr0(eADD(RPC, eIMM(im(2), W0)), Outputs); + } + case A2_sub: + case A2_subp: + return rr0(eSUB(rc(1), rc(2)), Outputs); + case A2_subri: + return rr0(eSUB(eIMM(im(1), W0), rc(2)), Outputs); + case S4_subi_asl_ri: { + RegisterCell RC = eSUB(eIMM(im(1), W0), eASL(rc(2), im(3))); + return rr0(RC, Outputs); + } + case S4_subi_lsr_ri: { + RegisterCell RC = eSUB(eIMM(im(1), W0), eLSR(rc(2), im(3))); + return rr0(RC, Outputs); + } + case M2_naccii: { + RegisterCell RC = eSUB(rc(1), eADD(rc(2), eIMM(im(3), W0))); + return rr0(RC, Outputs); + } + case M2_nacci: { + RegisterCell RC = eSUB(rc(1), eADD(rc(2), rc(3))); + return rr0(RC, Outputs); + } + // 32-bit negation is done by "Rd = A2_subri 0, Rs" + case A2_negp: + return rr0(eSUB(eIMM(0, W0), rc(1)), Outputs); + + case M2_mpy_up: { + RegisterCell M = eMLS(rc(1), rc(2)); + return rr0(hi(M, W0), Outputs); + } + case M2_dpmpyss_s0: + return rr0(eMLS(rc(1), rc(2)), Outputs); + case M2_dpmpyss_acc_s0: + return rr0(eADD(rc(1), eMLS(rc(2), rc(3))), Outputs); + case M2_dpmpyss_nac_s0: + return rr0(eSUB(rc(1), eMLS(rc(2), rc(3))), Outputs); + case M2_mpyi: { + RegisterCell M = eMLS(rc(1), rc(2)); + return rr0(lo(M, W0), Outputs); + } + case M2_macsip: { + RegisterCell M = eMLS(rc(2), eIMM(im(3), W0)); + RegisterCell RC = eADD(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case M2_macsin: { + RegisterCell M = eMLS(rc(2), eIMM(im(3), W0)); + RegisterCell RC = eSUB(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case M2_maci: { + RegisterCell M = eMLS(rc(2), rc(3)); + RegisterCell RC = eADD(rc(1), lo(M, W0)); + return rr0(RC, Outputs); + } + case M2_mpysmi: { + RegisterCell M = eMLS(rc(1), eIMM(im(2), W0)); + return rr0(lo(M, 32), Outputs); + } + case M2_mpysin: { + RegisterCell M = eMLS(rc(1), eIMM(-im(2), W0)); + return rr0(lo(M, 32), Outputs); + } + case M2_mpysip: { + RegisterCell M = eMLS(rc(1), eIMM(im(2), W0)); + return rr0(lo(M, 32), Outputs); + } + case M2_mpyu_up: { + RegisterCell M = eMLU(rc(1), rc(2)); + return rr0(hi(M, W0), Outputs); + } + case M2_dpmpyuu_s0: + return rr0(eMLU(rc(1), rc(2)), Outputs); + case M2_dpmpyuu_acc_s0: + return rr0(eADD(rc(1), eMLU(rc(2), rc(3))), Outputs); + case M2_dpmpyuu_nac_s0: + return rr0(eSUB(rc(1), eMLU(rc(2), rc(3))), Outputs); + //case M2_mpysu_up: + + // Logical/bitwise: + + case A2_andir: + return rr0(eAND(rc(1), eIMM(im(2), W0)), Outputs); + case A2_and: + case A2_andp: + return rr0(eAND(rc(1), rc(2)), Outputs); + case A4_andn: + case A4_andnp: + return rr0(eAND(rc(1), eNOT(rc(2))), Outputs); + case S4_andi_asl_ri: { + RegisterCell RC = eAND(eIMM(im(1), W0), eASL(rc(2), im(3))); + return rr0(RC, Outputs); + } + case S4_andi_lsr_ri: { + RegisterCell RC = eAND(eIMM(im(1), W0), eLSR(rc(2), im(3))); + return rr0(RC, Outputs); + } + case M4_and_and: + return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs); + case M4_and_andn: + return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs); + case M4_and_or: + return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs); + case M4_and_xor: + return rr0(eAND(rc(1), eXOR(rc(2), rc(3))), Outputs); + case A2_orir: + return rr0(eORL(rc(1), eIMM(im(2), W0)), Outputs); + case A2_or: + case A2_orp: + return rr0(eORL(rc(1), rc(2)), Outputs); + case A4_orn: + case A4_ornp: + return rr0(eORL(rc(1), eNOT(rc(2))), Outputs); + case S4_ori_asl_ri: { + RegisterCell RC = eORL(eIMM(im(1), W0), eASL(rc(2), im(3))); + return rr0(RC, Outputs); + } + case S4_ori_lsr_ri: { + RegisterCell RC = eORL(eIMM(im(1), W0), eLSR(rc(2), im(3))); + return rr0(RC, Outputs); + } + case M4_or_and: + return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs); + case M4_or_andn: + return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs); + case S4_or_andi: + case S4_or_andix: { + RegisterCell RC = eORL(rc(1), eAND(rc(2), eIMM(im(3), W0))); + return rr0(RC, Outputs); + } + case S4_or_ori: { + RegisterCell RC = eORL(rc(1), eORL(rc(2), eIMM(im(3), W0))); + return rr0(RC, Outputs); + } + case M4_or_or: + return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs); + case M4_or_xor: + return rr0(eORL(rc(1), eXOR(rc(2), rc(3))), Outputs); + case A2_xor: + case A2_xorp: + return rr0(eXOR(rc(1), rc(2)), Outputs); + case M4_xor_and: + return rr0(eXOR(rc(1), eAND(rc(2), rc(3))), Outputs); + case M4_xor_andn: + return rr0(eXOR(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs); + case M4_xor_or: + return rr0(eXOR(rc(1), eORL(rc(2), rc(3))), Outputs); + case M4_xor_xacc: + return rr0(eXOR(rc(1), eXOR(rc(2), rc(3))), Outputs); + case A2_not: + case A2_notp: + return rr0(eNOT(rc(1)), Outputs); + + case S2_asl_i_r: + case S2_asl_i_p: + return rr0(eASL(rc(1), im(2)), Outputs); + case A2_aslh: + return rr0(eASL(rc(1), 16), Outputs); + case S2_asl_i_r_acc: + case S2_asl_i_p_acc: + return rr0(eADD(rc(1), eASL(rc(2), im(3))), Outputs); + case S2_asl_i_r_nac: + case S2_asl_i_p_nac: + return rr0(eSUB(rc(1), eASL(rc(2), im(3))), Outputs); + case S2_asl_i_r_and: + case S2_asl_i_p_and: + return rr0(eAND(rc(1), eASL(rc(2), im(3))), Outputs); + case S2_asl_i_r_or: + case S2_asl_i_p_or: + return rr0(eORL(rc(1), eASL(rc(2), im(3))), Outputs); + case S2_asl_i_r_xacc: + case S2_asl_i_p_xacc: + return rr0(eXOR(rc(1), eASL(rc(2), im(3))), Outputs); + case S2_asl_i_vh: + case S2_asl_i_vw: + // TODO + break; + + case S2_asr_i_r: + case S2_asr_i_p: + return rr0(eASR(rc(1), im(2)), Outputs); + case A2_asrh: + return rr0(eASR(rc(1), 16), Outputs); + case S2_asr_i_r_acc: + case S2_asr_i_p_acc: + return rr0(eADD(rc(1), eASR(rc(2), im(3))), Outputs); + case S2_asr_i_r_nac: + case S2_asr_i_p_nac: + return rr0(eSUB(rc(1), eASR(rc(2), im(3))), Outputs); + case S2_asr_i_r_and: + case S2_asr_i_p_and: + return rr0(eAND(rc(1), eASR(rc(2), im(3))), Outputs); + case S2_asr_i_r_or: + case S2_asr_i_p_or: + return rr0(eORL(rc(1), eASR(rc(2), im(3))), Outputs); + case S2_asr_i_r_rnd: { + // The input is first sign-extended to 64 bits, then the output + // is truncated back to 32 bits. + assert(W0 == 32); + RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0); + RegisterCell RC = eASR(eADD(eASR(XC, im(2)), eIMM(1, 2*W0)), 1); + return rr0(eXTR(RC, 0, W0), Outputs); + } + case S2_asr_i_r_rnd_goodsyntax: { + int64_t S = im(2); + if (S == 0) + return rr0(rc(1), Outputs); + // Result: S2_asr_i_r_rnd Rs, u5-1 + RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0); + RegisterCell RC = eLSR(eADD(eASR(XC, S-1), eIMM(1, 2*W0)), 1); + return rr0(eXTR(RC, 0, W0), Outputs); + } + case S2_asr_r_vh: + case S2_asr_i_vw: + case S2_asr_i_svw_trun: + // TODO + break; + + case S2_lsr_i_r: + case S2_lsr_i_p: + return rr0(eLSR(rc(1), im(2)), Outputs); + case S2_lsr_i_r_acc: + case S2_lsr_i_p_acc: + return rr0(eADD(rc(1), eLSR(rc(2), im(3))), Outputs); + case S2_lsr_i_r_nac: + case S2_lsr_i_p_nac: + return rr0(eSUB(rc(1), eLSR(rc(2), im(3))), Outputs); + case S2_lsr_i_r_and: + case S2_lsr_i_p_and: + return rr0(eAND(rc(1), eLSR(rc(2), im(3))), Outputs); + case S2_lsr_i_r_or: + case S2_lsr_i_p_or: + return rr0(eORL(rc(1), eLSR(rc(2), im(3))), Outputs); + case S2_lsr_i_r_xacc: + case S2_lsr_i_p_xacc: + return rr0(eXOR(rc(1), eLSR(rc(2), im(3))), Outputs); + + case S2_clrbit_i: { + RegisterCell RC = rc(1); + RC[im(2)] = BT::BitValue::Zero; + return rr0(RC, Outputs); + } + case S2_setbit_i: { + RegisterCell RC = rc(1); + RC[im(2)] = BT::BitValue::One; + return rr0(RC, Outputs); + } + case S2_togglebit_i: { + RegisterCell RC = rc(1); + uint16_t BX = im(2); + RC[BX] = RC[BX].is(0) ? BT::BitValue::One + : RC[BX].is(1) ? BT::BitValue::Zero + : BT::BitValue::self(); + return rr0(RC, Outputs); + } + + case A4_bitspliti: { + uint16_t W1 = getRegBitWidth(Reg[1]); + uint16_t BX = im(2); + // Res.uw[1] = Rs[bx+1:], Res.uw[0] = Rs[0:bx] + const BT::BitValue Zero = BT::BitValue::Zero; + RegisterCell RZ = RegisterCell(W0).fill(BX, W1, Zero) + .fill(W1+(W1-BX), W0, Zero); + RegisterCell BF1 = eXTR(rc(1), 0, BX), BF2 = eXTR(rc(1), BX, W1); + RegisterCell RC = eINS(eINS(RZ, BF1, 0), BF2, W1); + return rr0(RC, Outputs); + } + case S4_extract: + case S4_extractp: + case S2_extractu: + case S2_extractup: { + uint16_t Wd = im(2), Of = im(3); + assert(Wd <= W0); + if (Wd == 0) + return rr0(eIMM(0, W0), Outputs); + // If the width extends beyond the register size, pad the register + // with 0 bits. + RegisterCell Pad = (Wd+Of > W0) ? rc(1).cat(eIMM(0, Wd+Of-W0)) : rc(1); + RegisterCell Ext = eXTR(Pad, Of, Wd+Of); + // Ext is short, need to extend it with 0s or sign bit. + RegisterCell RC = RegisterCell(W0).insert(Ext, BT::BitMask(0, Wd-1)); + if (Opc == S2_extractu || Opc == S2_extractup) + return rr0(eZXT(RC, Wd), Outputs); + return rr0(eSXT(RC, Wd), Outputs); + } + case S2_insert: + case S2_insertp: { + uint16_t Wd = im(3), Of = im(4); + assert(Wd < W0 && Of < W0); + // If Wd+Of exceeds W0, the inserted bits are truncated. + if (Wd+Of > W0) + Wd = W0-Of; + if (Wd == 0) + return rr0(rc(1), Outputs); + return rr0(eINS(rc(1), eXTR(rc(2), 0, Wd), Of), Outputs); + } + + // Bit permutations: + + case A2_combineii: + case A4_combineii: + case A4_combineir: + case A4_combineri: + case A2_combinew: + assert(W0 % 2 == 0); + return rr0(cop(2, W0/2).cat(cop(1, W0/2)), Outputs); + case A2_combine_ll: + case A2_combine_lh: + case A2_combine_hl: + case A2_combine_hh: { + assert(W0 == 32); + assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32); + // Low half in the output is 0 for _ll and _hl, 1 otherwise: + unsigned LoH = !(Opc == A2_combine_ll || Opc == A2_combine_hl); + // High half in the output is 0 for _ll and _lh, 1 otherwise: + unsigned HiH = !(Opc == A2_combine_ll || Opc == A2_combine_lh); + RegisterCell R1 = rc(1); + RegisterCell R2 = rc(2); + RegisterCell RC = half(R2, LoH).cat(half(R1, HiH)); + return rr0(RC, Outputs); + } + case S2_packhl: { + assert(W0 == 64); + assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32); + RegisterCell R1 = rc(1); + RegisterCell R2 = rc(2); + RegisterCell RC = half(R2, 0).cat(half(R1, 0)).cat(half(R2, 1)) + .cat(half(R1, 1)); + return rr0(RC, Outputs); + } + case S2_shuffeb: { + RegisterCell RC = shuffle(rc(1), rc(2), 8, false); + return rr0(RC, Outputs); + } + case S2_shuffeh: { + RegisterCell RC = shuffle(rc(1), rc(2), 16, false); + return rr0(RC, Outputs); + } + case S2_shuffob: { + RegisterCell RC = shuffle(rc(1), rc(2), 8, true); + return rr0(RC, Outputs); + } + case S2_shuffoh: { + RegisterCell RC = shuffle(rc(1), rc(2), 16, true); + return rr0(RC, Outputs); + } + case C2_mask: { + uint16_t WR = W0; + uint16_t WP = 8; // XXX Pred size: getRegBitWidth(Reg[1]); + assert(WR == 64 && WP == 8); + RegisterCell R1 = rc(1); + RegisterCell RC(WR); + for (uint16_t i = 0; i < WP; ++i) { + const BT::BitValue &V = R1[i]; + BT::BitValue F = (V.is(0) || V.is(1)) ? V : BT::BitValue::self(); + RC.fill(i*8, i*8+8, F); + } + return rr0(RC, Outputs); + } + + // Mux: + + case C2_muxii: + case C2_muxir: + case C2_muxri: + case C2_mux: { + BT::BitValue PC0 = rc(1)[0]; + RegisterCell R2 = cop(2, W0); + RegisterCell R3 = cop(3, W0); + if (PC0.is(0) || PC0.is(1)) + return rr0(RegisterCell::ref(PC0 ? R2 : R3), Outputs); + R2.meet(R3, Reg[0].Reg); + return rr0(R2, Outputs); + } + case C2_vmux: + // TODO + break; + + // Sign- and zero-extension: + + case A2_sxtb: + return rr0(eSXT(rc(1), 8), Outputs); + case A2_sxth: + return rr0(eSXT(rc(1), 16), Outputs); + case A2_sxtw: { + uint16_t W1 = getRegBitWidth(Reg[1]); + assert(W0 == 64 && W1 == 32); + RegisterCell RC = eSXT(rc(1).cat(eIMM(0, W1)), W1); + return rr0(RC, Outputs); + } + case A2_zxtb: + return rr0(eZXT(rc(1), 8), Outputs); + case A2_zxth: + return rr0(eZXT(rc(1), 16), Outputs); + + // Bit count: + + case S2_cl0: + case S2_cl0p: + // Always produce a 32-bit result. + return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs); + case S2_cl1: + case S2_cl1p: + return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs); + case S2_clb: + case S2_clbp: { + uint16_t W1 = getRegBitWidth(Reg[1]); + RegisterCell R1 = rc(1); + BT::BitValue TV = R1[W1-1]; + if (TV.is(0) || TV.is(1)) + return rr0(eCLB(R1, TV, 32), Outputs); + break; + } + case S2_ct0: + case S2_ct0p: + return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs); + case S2_ct1: + case S2_ct1p: + return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs); + case S5_popcountp: + // TODO + break; + + case C2_all8: { + RegisterCell P1 = rc(1); + bool Has0 = false, All1 = true; + for (uint16_t i = 0; i < 8/*XXX*/; ++i) { + if (!P1[i].is(1)) + All1 = false; + if (!P1[i].is(0)) + continue; + Has0 = true; + break; + } + if (!Has0 && !All1) + break; + RegisterCell RC(W0); + RC.fill(0, W0, (All1 ? BT::BitValue::One : BT::BitValue::Zero)); + return rr0(RC, Outputs); + } + case C2_any8: { + RegisterCell P1 = rc(1); + bool Has1 = false, All0 = true; + for (uint16_t i = 0; i < 8/*XXX*/; ++i) { + if (!P1[i].is(0)) + All0 = false; + if (!P1[i].is(1)) + continue; + Has1 = true; + break; + } + if (!Has1 && !All0) + break; + RegisterCell RC(W0); + RC.fill(0, W0, (Has1 ? BT::BitValue::One : BT::BitValue::Zero)); + return rr0(RC, Outputs); + } + case C2_and: + return rr0(eAND(rc(1), rc(2)), Outputs); + case C2_andn: + return rr0(eAND(rc(1), eNOT(rc(2))), Outputs); + case C2_not: + return rr0(eNOT(rc(1)), Outputs); + case C2_or: + return rr0(eORL(rc(1), rc(2)), Outputs); + case C2_orn: + return rr0(eORL(rc(1), eNOT(rc(2))), Outputs); + case C2_xor: + return rr0(eXOR(rc(1), rc(2)), Outputs); + case C4_and_and: + return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs); + case C4_and_andn: + return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs); + case C4_and_or: + return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs); + case C4_and_orn: + return rr0(eAND(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs); + case C4_or_and: + return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs); + case C4_or_andn: + return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs); + case C4_or_or: + return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs); + case C4_or_orn: + return rr0(eORL(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs); + case C2_bitsclr: + case C2_bitsclri: + case C2_bitsset: + case C4_nbitsclr: + case C4_nbitsclri: + case C4_nbitsset: + // TODO + break; + case S2_tstbit_i: + case S4_ntstbit_i: { + BT::BitValue V = rc(1)[im(2)]; + if (V.is(0) || V.is(1)) { + // If instruction is S2_tstbit_i, test for 1, otherwise test for 0. + bool TV = (Opc == S2_tstbit_i); + BT::BitValue F = V.is(TV) ? BT::BitValue::One : BT::BitValue::Zero; + return rr0(RegisterCell(W0).fill(0, W0, F), Outputs); + } + break; + } + + default: + return MachineEvaluator::evaluate(MI, Inputs, Outputs); + } + #undef im + #undef rc + #undef op + return false; +} + + +bool HexagonEvaluator::evaluate(const MachineInstr *BI, + const CellMapType &Inputs, BranchTargetList &Targets, + bool &FallsThru) const { + // We need to evaluate one branch at a time. TII::AnalyzeBranch checks + // all the branches in a basic block at once, so we cannot use it. + unsigned Opc = BI->getOpcode(); + bool SimpleBranch = false; + bool Negated = false; + switch (Opc) { + case Hexagon::J2_jumpf: + case Hexagon::J2_jumpfnew: + case Hexagon::J2_jumpfnewpt: + Negated = true; + case Hexagon::J2_jumpt: + case Hexagon::J2_jumptnew: + case Hexagon::J2_jumptnewpt: + // Simple branch: if([!]Pn) jump ... + // i.e. Op0 = predicate, Op1 = branch target. + SimpleBranch = true; + break; + case Hexagon::J2_jump: + Targets.insert(BI->getOperand(0).getMBB()); + FallsThru = false; + return true; + default: + // If the branch is of unknown type, assume that all successors are + // executable. + return false; + } + + if (!SimpleBranch) + return false; + + // BI is a conditional branch if we got here. + RegisterRef PR = BI->getOperand(0); + RegisterCell PC = getCell(PR, Inputs); + const BT::BitValue &Test = PC[0]; + + // If the condition is neither true nor false, then it's unknown. + if (!Test.is(0) && !Test.is(1)) + return false; + + // "Test.is(!Negated)" means "branch condition is true". + if (!Test.is(!Negated)) { + // Condition known to be false. + FallsThru = true; + return true; + } + + Targets.insert(BI->getOperand(1).getMBB()); + FallsThru = false; + return true; +} + + +bool HexagonEvaluator::evaluateLoad(const MachineInstr *MI, + const CellMapType &Inputs, CellMapType &Outputs) const { + if (TII.isPredicated(MI)) + return false; + assert(MI->mayLoad() && "A load that mayn't?"); + unsigned Opc = MI->getOpcode(); + + uint16_t BitNum; + bool SignEx; + using namespace Hexagon; + + switch (Opc) { + default: + return false; + +#if 0 + // memb_fifo + case L2_loadalignb_pbr: + case L2_loadalignb_pcr: + case L2_loadalignb_pi: + // memh_fifo + case L2_loadalignh_pbr: + case L2_loadalignh_pcr: + case L2_loadalignh_pi: + // membh + case L2_loadbsw2_pbr: + case L2_loadbsw2_pci: + case L2_loadbsw2_pcr: + case L2_loadbsw2_pi: + case L2_loadbsw4_pbr: + case L2_loadbsw4_pci: + case L2_loadbsw4_pcr: + case L2_loadbsw4_pi: + // memubh + case L2_loadbzw2_pbr: + case L2_loadbzw2_pci: + case L2_loadbzw2_pcr: + case L2_loadbzw2_pi: + case L2_loadbzw4_pbr: + case L2_loadbzw4_pci: + case L2_loadbzw4_pcr: + case L2_loadbzw4_pi: +#endif + + case L2_loadrbgp: + case L2_loadrb_io: + case L2_loadrb_pbr: + case L2_loadrb_pci: + case L2_loadrb_pcr: + case L2_loadrb_pi: + case L4_loadrb_abs: + case L4_loadrb_ap: + case L4_loadrb_rr: + case L4_loadrb_ur: + BitNum = 8; + SignEx = true; + break; + + case L2_loadrubgp: + case L2_loadrub_io: + case L2_loadrub_pbr: + case L2_loadrub_pci: + case L2_loadrub_pcr: + case L2_loadrub_pi: + case L4_loadrub_abs: + case L4_loadrub_ap: + case L4_loadrub_rr: + case L4_loadrub_ur: + BitNum = 8; + SignEx = false; + break; + + case L2_loadrhgp: + case L2_loadrh_io: + case L2_loadrh_pbr: + case L2_loadrh_pci: + case L2_loadrh_pcr: + case L2_loadrh_pi: + case L4_loadrh_abs: + case L4_loadrh_ap: + case L4_loadrh_rr: + case L4_loadrh_ur: + BitNum = 16; + SignEx = true; + break; + + case L2_loadruhgp: + case L2_loadruh_io: + case L2_loadruh_pbr: + case L2_loadruh_pci: + case L2_loadruh_pcr: + case L2_loadruh_pi: + case L4_loadruh_rr: + case L4_loadruh_abs: + case L4_loadruh_ap: + case L4_loadruh_ur: + BitNum = 16; + SignEx = false; + break; + + case L2_loadrigp: + case L2_loadri_io: + case L2_loadri_pbr: + case L2_loadri_pci: + case L2_loadri_pcr: + case L2_loadri_pi: + case L2_loadw_locked: + case L4_loadri_abs: + case L4_loadri_ap: + case L4_loadri_rr: + case L4_loadri_ur: + case LDriw_pred: + BitNum = 32; + SignEx = true; + break; + + case L2_loadrdgp: + case L2_loadrd_io: + case L2_loadrd_pbr: + case L2_loadrd_pci: + case L2_loadrd_pcr: + case L2_loadrd_pi: + case L4_loadd_locked: + case L4_loadrd_abs: + case L4_loadrd_ap: + case L4_loadrd_rr: + case L4_loadrd_ur: + BitNum = 64; + SignEx = true; + break; + } + + const MachineOperand &MD = MI->getOperand(0); + assert(MD.isReg() && MD.isDef()); + RegisterRef RD = MD; + + uint16_t W = getRegBitWidth(RD); + assert(W >= BitNum && BitNum > 0); + RegisterCell Res(W); + + for (uint16_t i = 0; i < BitNum; ++i) + Res[i] = BT::BitValue::self(BT::BitRef(RD.Reg, i)); + + if (SignEx) { + const BT::BitValue &Sign = Res[BitNum-1]; + for (uint16_t i = BitNum; i < W; ++i) + Res[i] = BT::BitValue::ref(Sign); + } else { + for (uint16_t i = BitNum; i < W; ++i) + Res[i] = BT::BitValue::Zero; + } + + putCell(RD, Res, Outputs); + return true; +} + + +bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr *MI, + const CellMapType &Inputs, CellMapType &Outputs) const { + // If MI defines a formal parameter, but is not a copy (loads are handled + // in evaluateLoad), then it's not clear what to do. + assert(MI->isCopy()); + + RegisterRef RD = MI->getOperand(0); + RegisterRef RS = MI->getOperand(1); + assert(RD.Sub == 0); + if (!TargetRegisterInfo::isPhysicalRegister(RS.Reg)) + return false; + RegExtMap::const_iterator F = VRX.find(RD.Reg); + if (F == VRX.end()) + return false; + + uint16_t EW = F->second.Width; + // Store RD's cell into the map. This will associate the cell with a virtual + // register, and make zero-/sign-extends possible (otherwise we would be ex- + // tending "self" bit values, which will have no effect, since "self" values + // cannot be references to anything). + putCell(RD, getCell(RS, Inputs), Outputs); + + RegisterCell Res; + // Read RD's cell from the outputs instead of RS's cell from the inputs: + if (F->second.Type == ExtType::SExt) + Res = eSXT(getCell(RD, Outputs), EW); + else if (F->second.Type == ExtType::ZExt) + Res = eZXT(getCell(RD, Outputs), EW); + + putCell(RD, Res, Outputs); + return true; +} + + +unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const { + using namespace Hexagon; + bool Is64 = DoubleRegsRegClass.contains(PReg); + assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg)); + + static const unsigned Phys32[] = { R0, R1, R2, R3, R4, R5 }; + static const unsigned Phys64[] = { D0, D1, D2 }; + const unsigned Num32 = sizeof(Phys32)/sizeof(unsigned); + const unsigned Num64 = sizeof(Phys64)/sizeof(unsigned); + + // Return the first parameter register of the required width. + if (PReg == 0) + return (Width <= 32) ? Phys32[0] : Phys64[0]; + + // Set Idx32, Idx64 in such a way that Idx+1 would give the index of the + // next register. + unsigned Idx32 = 0, Idx64 = 0; + if (!Is64) { + while (Idx32 < Num32) { + if (Phys32[Idx32] == PReg) + break; + Idx32++; + } + Idx64 = Idx32/2; + } else { + while (Idx64 < Num64) { + if (Phys64[Idx64] == PReg) + break; + Idx64++; + } + Idx32 = Idx64*2+1; + } + + if (Width <= 32) + return (Idx32+1 < Num32) ? Phys32[Idx32+1] : 0; + return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0; +} + + +unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const { + typedef MachineRegisterInfo::livein_iterator iterator; + for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) { + if (I->first == PReg) + return I->second; + } + return 0; +} diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h new file mode 100644 index 000000000000..897af2d71870 --- /dev/null +++ b/lib/Target/Hexagon/HexagonBitTracker.h @@ -0,0 +1,64 @@ +//===--- HexagonBitTracker.h ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONBITTRACKER_H +#define HEXAGONBITTRACKER_H + +#include "BitTracker.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + class HexagonInstrInfo; + class HexagonRegisterInfo; + +struct HexagonEvaluator : public BitTracker::MachineEvaluator { + typedef BitTracker::CellMapType CellMapType; + typedef BitTracker::RegisterRef RegisterRef; + typedef BitTracker::RegisterCell RegisterCell; + typedef BitTracker::BranchTargetList BranchTargetList; + + HexagonEvaluator(const HexagonRegisterInfo &tri, MachineRegisterInfo &mri, + const HexagonInstrInfo &tii, MachineFunction &mf); + + bool evaluate(const MachineInstr *MI, const CellMapType &Inputs, + CellMapType &Outputs) const override; + bool evaluate(const MachineInstr *BI, const CellMapType &Inputs, + BranchTargetList &Targets, bool &FallsThru) const override; + + BitTracker::BitMask mask(unsigned Reg, unsigned Sub) const override; + + MachineFunction &MF; + MachineFrameInfo &MFI; + const HexagonInstrInfo &TII; + +private: + bool evaluateLoad(const MachineInstr *MI, const CellMapType &Inputs, + CellMapType &Outputs) const; + bool evaluateFormalCopy(const MachineInstr *MI, const CellMapType &Inputs, + CellMapType &Outputs) const; + + unsigned getNextPhysReg(unsigned PReg, unsigned Width) const; + unsigned getVirtRegFor(unsigned PReg) const; + + // Type of formal parameter extension. + struct ExtType { + enum { SExt, ZExt }; + char Type; + uint16_t Width; + ExtType() : Type(0), Width(0) {} + ExtType(char t, uint16_t w) : Type(t), Width(w) {} + }; + // Map VR -> extension type. + typedef DenseMap<unsigned, ExtType> RegExtMap; + RegExtMap VRX; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp new file mode 100644 index 000000000000..9f5fac156527 --- /dev/null +++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -0,0 +1,1325 @@ +//===--- HexagonCommonGEP.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "commgep" + +#include "llvm/Pass.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" + +#include <map> +#include <set> +#include <vector> + +#include "HexagonTargetMachine.h" + +using namespace llvm; + +static cl::opt<bool> OptSpeculate("commgep-speculate", cl::init(true), + cl::Hidden, cl::ZeroOrMore); + +static cl::opt<bool> OptEnableInv("commgep-inv", cl::init(true), cl::Hidden, + cl::ZeroOrMore); + +static cl::opt<bool> OptEnableConst("commgep-const", cl::init(true), + cl::Hidden, cl::ZeroOrMore); + +namespace llvm { + void initializeHexagonCommonGEPPass(PassRegistry&); +} + +namespace { + struct GepNode; + typedef std::set<GepNode*> NodeSet; + typedef std::map<GepNode*,Value*> NodeToValueMap; + typedef std::vector<GepNode*> NodeVect; + typedef std::map<GepNode*,NodeVect> NodeChildrenMap; + typedef std::set<Use*> UseSet; + typedef std::map<GepNode*,UseSet> NodeToUsesMap; + + // Numbering map for gep nodes. Used to keep track of ordering for + // gep nodes. + struct NodeNumbering : public std::map<const GepNode*,unsigned> { + }; + + struct NodeOrdering : public NodeNumbering { + NodeOrdering() : LastNum(0) {} +#ifdef _MSC_VER + void special_insert_for_special_msvc(const GepNode *N) +#else + using NodeNumbering::insert; + void insert(const GepNode* N) +#endif + { + insert(std::make_pair(N, ++LastNum)); + } + bool operator() (const GepNode* N1, const GepNode *N2) const { + const_iterator F1 = find(N1), F2 = find(N2); + assert(F1 != end() && F2 != end()); + return F1->second < F2->second; + } + private: + unsigned LastNum; + }; + + + class HexagonCommonGEP : public FunctionPass { + public: + static char ID; + HexagonCommonGEP() : FunctionPass(ID) { + initializeHexagonCommonGEPPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnFunction(Function &F); + virtual const char *getPassName() const { + return "Hexagon Common GEP"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTree>(); + AU.addPreserved<PostDominatorTree>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + FunctionPass::getAnalysisUsage(AU); + } + + private: + typedef std::map<Value*,GepNode*> ValueToNodeMap; + typedef std::vector<Value*> ValueVect; + typedef std::map<GepNode*,ValueVect> NodeToValuesMap; + + void getBlockTraversalOrder(BasicBlock *Root, ValueVect &Order); + bool isHandledGepForm(GetElementPtrInst *GepI); + void processGepInst(GetElementPtrInst *GepI, ValueToNodeMap &NM); + void collect(); + void common(); + + BasicBlock *recalculatePlacement(GepNode *Node, NodeChildrenMap &NCM, + NodeToValueMap &Loc); + BasicBlock *recalculatePlacementRec(GepNode *Node, NodeChildrenMap &NCM, + NodeToValueMap &Loc); + bool isInvariantIn(Value *Val, Loop *L); + bool isInvariantIn(GepNode *Node, Loop *L); + bool isInMainPath(BasicBlock *B, Loop *L); + BasicBlock *adjustForInvariance(GepNode *Node, NodeChildrenMap &NCM, + NodeToValueMap &Loc); + void separateChainForNode(GepNode *Node, Use *U, NodeToValueMap &Loc); + void separateConstantChains(GepNode *Node, NodeChildrenMap &NCM, + NodeToValueMap &Loc); + void computeNodePlacement(NodeToValueMap &Loc); + + Value *fabricateGEP(NodeVect &NA, BasicBlock::iterator At, + BasicBlock *LocB); + void getAllUsersForNode(GepNode *Node, ValueVect &Values, + NodeChildrenMap &NCM); + void materialize(NodeToValueMap &Loc); + + void removeDeadCode(); + + NodeVect Nodes; + NodeToUsesMap Uses; + NodeOrdering NodeOrder; // Node ordering, for deterministic behavior. + SpecificBumpPtrAllocator<GepNode> *Mem; + LLVMContext *Ctx; + LoopInfo *LI; + DominatorTree *DT; + PostDominatorTree *PDT; + Function *Fn; + }; +} + + +char HexagonCommonGEP::ID = 0; +INITIALIZE_PASS_BEGIN(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP", + false, false) + +namespace { + struct GepNode { + enum { + None = 0, + Root = 0x01, + Internal = 0x02, + Used = 0x04 + }; + + uint32_t Flags; + union { + GepNode *Parent; + Value *BaseVal; + }; + Value *Idx; + Type *PTy; // Type of the pointer operand. + + GepNode() : Flags(0), Parent(0), Idx(0), PTy(0) {} + GepNode(const GepNode *N) : Flags(N->Flags), Idx(N->Idx), PTy(N->PTy) { + if (Flags & Root) + BaseVal = N->BaseVal; + else + Parent = N->Parent; + } + friend raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN); + }; + + + Type *next_type(Type *Ty, Value *Idx) { + // Advance the type. + if (!Ty->isStructTy()) { + Type *NexTy = cast<SequentialType>(Ty)->getElementType(); + return NexTy; + } + // Otherwise it is a struct type. + ConstantInt *CI = dyn_cast<ConstantInt>(Idx); + assert(CI && "Struct type with non-constant index"); + int64_t i = CI->getValue().getSExtValue(); + Type *NextTy = cast<StructType>(Ty)->getElementType(i); + return NextTy; + } + + + raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN) { + OS << "{ {"; + bool Comma = false; + if (GN.Flags & GepNode::Root) { + OS << "root"; + Comma = true; + } + if (GN.Flags & GepNode::Internal) { + if (Comma) + OS << ','; + OS << "internal"; + Comma = true; + } + if (GN.Flags & GepNode::Used) { + if (Comma) + OS << ','; + OS << "used"; + Comma = true; + } + OS << "} "; + if (GN.Flags & GepNode::Root) + OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')'; + else + OS << "Parent:" << GN.Parent; + + OS << " Idx:"; + if (ConstantInt *CI = dyn_cast<ConstantInt>(GN.Idx)) + OS << CI->getValue().getSExtValue(); + else if (GN.Idx->hasName()) + OS << GN.Idx->getName(); + else + OS << "<anon> =" << *GN.Idx; + + OS << " PTy:"; + if (GN.PTy->isStructTy()) { + StructType *STy = cast<StructType>(GN.PTy); + if (!STy->isLiteral()) + OS << GN.PTy->getStructName(); + else + OS << "<anon-struct>:" << *STy; + } + else + OS << *GN.PTy; + OS << " }"; + return OS; + } + + + template <typename NodeContainer> + void dump_node_container(raw_ostream &OS, const NodeContainer &S) { + typedef typename NodeContainer::const_iterator const_iterator; + for (const_iterator I = S.begin(), E = S.end(); I != E; ++I) + OS << *I << ' ' << **I << '\n'; + } + + raw_ostream &operator<< (raw_ostream &OS, + const NodeVect &S) LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) { + dump_node_container(OS, S); + return OS; + } + + + raw_ostream &operator<< (raw_ostream &OS, + const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){ + typedef NodeToUsesMap::const_iterator const_iterator; + for (const_iterator I = M.begin(), E = M.end(); I != E; ++I) { + const UseSet &Us = I->second; + OS << I->first << " -> #" << Us.size() << '{'; + for (UseSet::const_iterator J = Us.begin(), F = Us.end(); J != F; ++J) { + User *R = (*J)->getUser(); + if (R->hasName()) + OS << ' ' << R->getName(); + else + OS << " <?>(" << *R << ')'; + } + OS << " }\n"; + } + return OS; + } + + + struct in_set { + in_set(const NodeSet &S) : NS(S) {} + bool operator() (GepNode *N) const { + return NS.find(N) != NS.end(); + } + private: + const NodeSet &NS; + }; +} + + +inline void *operator new(size_t, SpecificBumpPtrAllocator<GepNode> &A) { + return A.Allocate(); +} + + +void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root, + ValueVect &Order) { + // Compute block ordering for a typical DT-based traversal of the flow + // graph: "before visiting a block, all of its dominators must have been + // visited". + + Order.push_back(Root); + DomTreeNode *DTN = DT->getNode(Root); + typedef GraphTraits<DomTreeNode*> GTN; + typedef GTN::ChildIteratorType Iter; + for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I) + getBlockTraversalOrder((*I)->getBlock(), Order); +} + + +bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) { + // No vector GEPs. + if (!GepI->getType()->isPointerTy()) + return false; + // No GEPs without any indices. (Is this possible?) + if (GepI->idx_begin() == GepI->idx_end()) + return false; + return true; +} + + +void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, + ValueToNodeMap &NM) { + DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n'); + GepNode *N = new (*Mem) GepNode; + Value *PtrOp = GepI->getPointerOperand(); + ValueToNodeMap::iterator F = NM.find(PtrOp); + if (F == NM.end()) { + N->BaseVal = PtrOp; + N->Flags |= GepNode::Root; + } else { + // If PtrOp was a GEP instruction, it must have already been processed. + // The ValueToNodeMap entry for it is the last gep node in the generated + // chain. Link to it here. + N->Parent = F->second; + } + N->PTy = PtrOp->getType(); + N->Idx = *GepI->idx_begin(); + + // Collect the list of users of this GEP instruction. Will add it to the + // last node created for it. + UseSet Us; + for (Value::user_iterator UI = GepI->user_begin(), UE = GepI->user_end(); + UI != UE; ++UI) { + // Check if this gep is used by anything other than other geps that + // we will process. + if (isa<GetElementPtrInst>(*UI)) { + GetElementPtrInst *UserG = cast<GetElementPtrInst>(*UI); + if (isHandledGepForm(UserG)) + continue; + } + Us.insert(&UI.getUse()); + } + Nodes.push_back(N); +#ifdef _MSC_VER + NodeOrder.special_insert_for_special_msvc(N); +#else + NodeOrder.insert(N); +#endif + + // Skip the first index operand, since we only handle 0. This dereferences + // the pointer operand. + GepNode *PN = N; + Type *PtrTy = cast<PointerType>(PtrOp->getType())->getElementType(); + for (User::op_iterator OI = GepI->idx_begin()+1, OE = GepI->idx_end(); + OI != OE; ++OI) { + Value *Op = *OI; + GepNode *Nx = new (*Mem) GepNode; + Nx->Parent = PN; // Link Nx to the previous node. + Nx->Flags |= GepNode::Internal; + Nx->PTy = PtrTy; + Nx->Idx = Op; + Nodes.push_back(Nx); +#ifdef _MSC_VER + NodeOrder.special_insert_for_special_msvc(Nx); +#else + NodeOrder.insert(Nx); +#endif + PN = Nx; + + PtrTy = next_type(PtrTy, Op); + } + + // After last node has been created, update the use information. + if (!Us.empty()) { + PN->Flags |= GepNode::Used; + Uses[PN].insert(Us.begin(), Us.end()); + } + + // Link the last node with the originating GEP instruction. This is to + // help with linking chained GEP instructions. + NM.insert(std::make_pair(GepI, PN)); +} + + +void HexagonCommonGEP::collect() { + // Establish depth-first traversal order of the dominator tree. + ValueVect BO; + getBlockTraversalOrder(Fn->begin(), BO); + + // The creation of gep nodes requires DT-traversal. When processing a GEP + // instruction that uses another GEP instruction as the base pointer, the + // gep node for the base pointer should already exist. + ValueToNodeMap NM; + for (ValueVect::iterator I = BO.begin(), E = BO.end(); I != E; ++I) { + BasicBlock *B = cast<BasicBlock>(*I); + for (BasicBlock::iterator J = B->begin(), F = B->end(); J != F; ++J) { + if (!isa<GetElementPtrInst>(J)) + continue; + GetElementPtrInst *GepI = cast<GetElementPtrInst>(J); + if (isHandledGepForm(GepI)) + processGepInst(GepI, NM); + } + } + + DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes); +} + + +namespace { + void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM, + NodeVect &Roots) { + typedef NodeVect::const_iterator const_iterator; + for (const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { + GepNode *N = *I; + if (N->Flags & GepNode::Root) { + Roots.push_back(N); + continue; + } + GepNode *PN = N->Parent; + NCM[PN].push_back(N); + } + } + + void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM, NodeSet &Nodes) { + NodeVect Work; + Work.push_back(Root); + Nodes.insert(Root); + + while (!Work.empty()) { + NodeVect::iterator First = Work.begin(); + GepNode *N = *First; + Work.erase(First); + NodeChildrenMap::iterator CF = NCM.find(N); + if (CF != NCM.end()) { + Work.insert(Work.end(), CF->second.begin(), CF->second.end()); + Nodes.insert(CF->second.begin(), CF->second.end()); + } + } + } +} + + +namespace { + typedef std::set<NodeSet> NodeSymRel; + typedef std::pair<GepNode*,GepNode*> NodePair; + typedef std::set<NodePair> NodePairSet; + + const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) { + for (NodeSymRel::iterator I = Rel.begin(), E = Rel.end(); I != E; ++I) + if (I->count(N)) + return &*I; + return 0; + } + + // Create an ordered pair of GepNode pointers. The pair will be used in + // determining equality. The only purpose of the ordering is to eliminate + // duplication due to the commutativity of equality/non-equality. + NodePair node_pair(GepNode *N1, GepNode *N2) { + uintptr_t P1 = uintptr_t(N1), P2 = uintptr_t(N2); + if (P1 <= P2) + return std::make_pair(N1, N2); + return std::make_pair(N2, N1); + } + + unsigned node_hash(GepNode *N) { + // Include everything except flags and parent. + FoldingSetNodeID ID; + ID.AddPointer(N->Idx); + ID.AddPointer(N->PTy); + return ID.ComputeHash(); + } + + bool node_eq(GepNode *N1, GepNode *N2, NodePairSet &Eq, NodePairSet &Ne) { + // Don't cache the result for nodes with different hashes. The hash + // comparison is fast enough. + if (node_hash(N1) != node_hash(N2)) + return false; + + NodePair NP = node_pair(N1, N2); + NodePairSet::iterator FEq = Eq.find(NP); + if (FEq != Eq.end()) + return true; + NodePairSet::iterator FNe = Ne.find(NP); + if (FNe != Ne.end()) + return false; + // Not previously compared. + bool Root1 = N1->Flags & GepNode::Root; + bool Root2 = N2->Flags & GepNode::Root; + NodePair P = node_pair(N1, N2); + // If the Root flag has different values, the nodes are different. + // If both nodes are root nodes, but their base pointers differ, + // they are different. + if (Root1 != Root2 || (Root1 && N1->BaseVal != N2->BaseVal)) { + Ne.insert(P); + return false; + } + // Here the root flags are identical, and for root nodes the + // base pointers are equal, so the root nodes are equal. + // For non-root nodes, compare their parent nodes. + if (Root1 || node_eq(N1->Parent, N2->Parent, Eq, Ne)) { + Eq.insert(P); + return true; + } + return false; + } +} + + +void HexagonCommonGEP::common() { + // The essence of this commoning is finding gep nodes that are equal. + // To do this we need to compare all pairs of nodes. To save time, + // first, partition the set of all nodes into sets of potentially equal + // nodes, and then compare pairs from within each partition. + typedef std::map<unsigned,NodeSet> NodeSetMap; + NodeSetMap MaybeEq; + + for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { + GepNode *N = *I; + unsigned H = node_hash(N); + MaybeEq[H].insert(N); + } + + // Compute the equivalence relation for the gep nodes. Use two caches, + // one for equality and the other for non-equality. + NodeSymRel EqRel; // Equality relation (as set of equivalence classes). + NodePairSet Eq, Ne; // Caches. + for (NodeSetMap::iterator I = MaybeEq.begin(), E = MaybeEq.end(); + I != E; ++I) { + NodeSet &S = I->second; + for (NodeSet::iterator NI = S.begin(), NE = S.end(); NI != NE; ++NI) { + GepNode *N = *NI; + // If node already has a class, then the class must have been created + // in a prior iteration of this loop. Since equality is transitive, + // nothing more will be added to that class, so skip it. + if (node_class(N, EqRel)) + continue; + + // Create a new class candidate now. + NodeSet C; + for (NodeSet::iterator NJ = std::next(NI); NJ != NE; ++NJ) + if (node_eq(N, *NJ, Eq, Ne)) + C.insert(*NJ); + // If Tmp is empty, N would be the only element in it. Don't bother + // creating a class for it then. + if (!C.empty()) { + C.insert(N); // Finalize the set before adding it to the relation. + std::pair<NodeSymRel::iterator, bool> Ins = EqRel.insert(C); + (void)Ins; + assert(Ins.second && "Cannot add a class"); + } + } + } + + DEBUG({ + dbgs() << "Gep node equality:\n"; + for (NodePairSet::iterator I = Eq.begin(), E = Eq.end(); I != E; ++I) + dbgs() << "{ " << I->first << ", " << I->second << " }\n"; + + dbgs() << "Gep equivalence classes:\n"; + for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) { + dbgs() << '{'; + const NodeSet &S = *I; + for (NodeSet::const_iterator J = S.begin(), F = S.end(); J != F; ++J) { + if (J != S.begin()) + dbgs() << ','; + dbgs() << ' ' << *J; + } + dbgs() << " }\n"; + } + }); + + + // Create a projection from a NodeSet to the minimal element in it. + typedef std::map<const NodeSet*,GepNode*> ProjMap; + ProjMap PM; + for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) { + const NodeSet &S = *I; + GepNode *Min = *std::min_element(S.begin(), S.end(), NodeOrder); + std::pair<ProjMap::iterator,bool> Ins = PM.insert(std::make_pair(&S, Min)); + (void)Ins; + assert(Ins.second && "Cannot add minimal element"); + + // Update the min element's flags, and user list. + uint32_t Flags = 0; + UseSet &MinUs = Uses[Min]; + for (NodeSet::iterator J = S.begin(), F = S.end(); J != F; ++J) { + GepNode *N = *J; + uint32_t NF = N->Flags; + // If N is used, append all original values of N to the list of + // original values of Min. + if (NF & GepNode::Used) + MinUs.insert(Uses[N].begin(), Uses[N].end()); + Flags |= NF; + } + if (MinUs.empty()) + Uses.erase(Min); + + // The collected flags should include all the flags from the min element. + assert((Min->Flags & Flags) == Min->Flags); + Min->Flags = Flags; + } + + // Commoning: for each non-root gep node, replace "Parent" with the + // selected (minimum) node from the corresponding equivalence class. + // If a given parent does not have an equivalence class, leave it + // unchanged (it means that it's the only element in its class). + for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { + GepNode *N = *I; + if (N->Flags & GepNode::Root) + continue; + const NodeSet *PC = node_class(N->Parent, EqRel); + if (!PC) + continue; + ProjMap::iterator F = PM.find(PC); + if (F == PM.end()) + continue; + // Found a replacement, use it. + GepNode *Rep = F->second; + N->Parent = Rep; + } + + DEBUG(dbgs() << "Gep nodes after commoning:\n" << Nodes); + + // Finally, erase the nodes that are no longer used. + NodeSet Erase; + for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) { + GepNode *N = *I; + const NodeSet *PC = node_class(N, EqRel); + if (!PC) + continue; + ProjMap::iterator F = PM.find(PC); + if (F == PM.end()) + continue; + if (N == F->second) + continue; + // Node for removal. + Erase.insert(*I); + } + NodeVect::iterator NewE = std::remove_if(Nodes.begin(), Nodes.end(), + in_set(Erase)); + Nodes.resize(std::distance(Nodes.begin(), NewE)); + + DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes); +} + + +namespace { + template <typename T> + BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) { + DEBUG({ + dbgs() << "NCD of {"; + for (typename T::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + if (!*I) + continue; + BasicBlock *B = cast<BasicBlock>(*I); + dbgs() << ' ' << B->getName(); + } + dbgs() << " }\n"; + }); + + // Allow null basic blocks in Blocks. In such cases, return 0. + typename T::iterator I = Blocks.begin(), E = Blocks.end(); + if (I == E || !*I) + return 0; + BasicBlock *Dom = cast<BasicBlock>(*I); + while (++I != E) { + BasicBlock *B = cast_or_null<BasicBlock>(*I); + Dom = B ? DT->findNearestCommonDominator(Dom, B) : 0; + if (!Dom) + return 0; + } + DEBUG(dbgs() << "computed:" << Dom->getName() << '\n'); + return Dom; + } + + template <typename T> + BasicBlock *nearest_common_dominatee(DominatorTree *DT, T &Blocks) { + // If two blocks, A and B, dominate a block C, then A dominates B, + // or B dominates A. + typename T::iterator I = Blocks.begin(), E = Blocks.end(); + // Find the first non-null block. + while (I != E && !*I) + ++I; + if (I == E) + return DT->getRoot(); + BasicBlock *DomB = cast<BasicBlock>(*I); + while (++I != E) { + if (!*I) + continue; + BasicBlock *B = cast<BasicBlock>(*I); + if (DT->dominates(B, DomB)) + continue; + if (!DT->dominates(DomB, B)) + return 0; + DomB = B; + } + return DomB; + } + + // Find the first use in B of any value from Values. If no such use, + // return B->end(). + template <typename T> + BasicBlock::iterator first_use_of_in_block(T &Values, BasicBlock *B) { + BasicBlock::iterator FirstUse = B->end(), BEnd = B->end(); + typedef typename T::iterator iterator; + for (iterator I = Values.begin(), E = Values.end(); I != E; ++I) { + Value *V = *I; + // If V is used in a PHI node, the use belongs to the incoming block, + // not the block with the PHI node. In the incoming block, the use + // would be considered as being at the end of it, so it cannot + // influence the position of the first use (which is assumed to be + // at the end to start with). + if (isa<PHINode>(V)) + continue; + if (!isa<Instruction>(V)) + continue; + Instruction *In = cast<Instruction>(V); + if (In->getParent() != B) + continue; + BasicBlock::iterator It = In; + if (std::distance(FirstUse, BEnd) < std::distance(It, BEnd)) + FirstUse = It; + } + return FirstUse; + } + + bool is_empty(const BasicBlock *B) { + return B->empty() || (&*B->begin() == B->getTerminator()); + } +} + + +BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node, + NodeChildrenMap &NCM, NodeToValueMap &Loc) { + DEBUG(dbgs() << "Loc for node:" << Node << '\n'); + // Recalculate the placement for Node, assuming that the locations of + // its children in Loc are valid. + // Return 0 if there is no valid placement for Node (for example, it + // uses an index value that is not available at the location required + // to dominate all children, etc.). + + // Find the nearest common dominator for: + // - all users, if the node is used, and + // - all children. + ValueVect Bs; + if (Node->Flags & GepNode::Used) { + // Append all blocks with uses of the original values to the + // block vector Bs. + NodeToUsesMap::iterator UF = Uses.find(Node); + assert(UF != Uses.end() && "Used node with no use information"); + UseSet &Us = UF->second; + for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) { + Use *U = *I; + User *R = U->getUser(); + if (!isa<Instruction>(R)) + continue; + BasicBlock *PB = isa<PHINode>(R) + ? cast<PHINode>(R)->getIncomingBlock(*U) + : cast<Instruction>(R)->getParent(); + Bs.push_back(PB); + } + } + // Append the location of each child. + NodeChildrenMap::iterator CF = NCM.find(Node); + if (CF != NCM.end()) { + NodeVect &Cs = CF->second; + for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) { + GepNode *CN = *I; + NodeToValueMap::iterator LF = Loc.find(CN); + // If the child is only used in GEP instructions (i.e. is not used in + // non-GEP instructions), the nearest dominator computed for it may + // have been null. In such case it won't have a location available. + if (LF == Loc.end()) + continue; + Bs.push_back(LF->second); + } + } + + BasicBlock *DomB = nearest_common_dominator(DT, Bs); + if (!DomB) + return 0; + // Check if the index used by Node dominates the computed dominator. + Instruction *IdxI = dyn_cast<Instruction>(Node->Idx); + if (IdxI && !DT->dominates(IdxI->getParent(), DomB)) + return 0; + + // Avoid putting nodes into empty blocks. + while (is_empty(DomB)) { + DomTreeNode *N = (*DT)[DomB]->getIDom(); + if (!N) + break; + DomB = N->getBlock(); + } + + // Otherwise, DomB is fine. Update the location map. + Loc[Node] = DomB; + return DomB; +} + + +BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node, + NodeChildrenMap &NCM, NodeToValueMap &Loc) { + DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n'); + // Recalculate the placement of Node, after recursively recalculating the + // placements of all its children. + NodeChildrenMap::iterator CF = NCM.find(Node); + if (CF != NCM.end()) { + NodeVect &Cs = CF->second; + for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) + recalculatePlacementRec(*I, NCM, Loc); + } + BasicBlock *LB = recalculatePlacement(Node, NCM, Loc); + DEBUG(dbgs() << "LocRec end for node:" << Node << '\n'); + return LB; +} + + +bool HexagonCommonGEP::isInvariantIn(Value *Val, Loop *L) { + if (isa<Constant>(Val) || isa<Argument>(Val)) + return true; + Instruction *In = dyn_cast<Instruction>(Val); + if (!In) + return false; + BasicBlock *HdrB = L->getHeader(), *DefB = In->getParent(); + return DT->properlyDominates(DefB, HdrB); +} + + +bool HexagonCommonGEP::isInvariantIn(GepNode *Node, Loop *L) { + if (Node->Flags & GepNode::Root) + if (!isInvariantIn(Node->BaseVal, L)) + return false; + return isInvariantIn(Node->Idx, L); +} + + +bool HexagonCommonGEP::isInMainPath(BasicBlock *B, Loop *L) { + BasicBlock *HB = L->getHeader(); + BasicBlock *LB = L->getLoopLatch(); + // B must post-dominate the loop header or dominate the loop latch. + if (PDT->dominates(B, HB)) + return true; + if (LB && DT->dominates(B, LB)) + return true; + return false; +} + + +namespace { + BasicBlock *preheader(DominatorTree *DT, Loop *L) { + if (BasicBlock *PH = L->getLoopPreheader()) + return PH; + if (!OptSpeculate) + return 0; + DomTreeNode *DN = DT->getNode(L->getHeader()); + if (!DN) + return 0; + return DN->getIDom()->getBlock(); + } +} + + +BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node, + NodeChildrenMap &NCM, NodeToValueMap &Loc) { + // Find the "topmost" location for Node: it must be dominated by both, + // its parent (or the BaseVal, if it's a root node), and by the index + // value. + ValueVect Bs; + if (Node->Flags & GepNode::Root) { + if (Instruction *PIn = dyn_cast<Instruction>(Node->BaseVal)) + Bs.push_back(PIn->getParent()); + } else { + Bs.push_back(Loc[Node->Parent]); + } + if (Instruction *IIn = dyn_cast<Instruction>(Node->Idx)) + Bs.push_back(IIn->getParent()); + BasicBlock *TopB = nearest_common_dominatee(DT, Bs); + + // Traverse the loop nest upwards until we find a loop in which Node + // is no longer invariant, or until we get to the upper limit of Node's + // placement. The traversal will also stop when a suitable "preheader" + // cannot be found for a given loop. The "preheader" may actually be + // a regular block outside of the loop (i.e. not guarded), in which case + // the Node will be speculated. + // For nodes that are not in the main path of the containing loop (i.e. + // are not executed in each iteration), do not move them out of the loop. + BasicBlock *LocB = cast_or_null<BasicBlock>(Loc[Node]); + if (LocB) { + Loop *Lp = LI->getLoopFor(LocB); + while (Lp) { + if (!isInvariantIn(Node, Lp) || !isInMainPath(LocB, Lp)) + break; + BasicBlock *NewLoc = preheader(DT, Lp); + if (!NewLoc || !DT->dominates(TopB, NewLoc)) + break; + Lp = Lp->getParentLoop(); + LocB = NewLoc; + } + } + Loc[Node] = LocB; + + // Recursively compute the locations of all children nodes. + NodeChildrenMap::iterator CF = NCM.find(Node); + if (CF != NCM.end()) { + NodeVect &Cs = CF->second; + for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) + adjustForInvariance(*I, NCM, Loc); + } + return LocB; +} + + +namespace { + struct LocationAsBlock { + LocationAsBlock(const NodeToValueMap &L) : Map(L) {} + const NodeToValueMap ⤅ + }; + + raw_ostream &operator<< (raw_ostream &OS, + const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ; + raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) { + for (NodeToValueMap::const_iterator I = Loc.Map.begin(), E = Loc.Map.end(); + I != E; ++I) { + OS << I->first << " -> "; + BasicBlock *B = cast<BasicBlock>(I->second); + OS << B->getName() << '(' << B << ')'; + OS << '\n'; + } + return OS; + } + + inline bool is_constant(GepNode *N) { + return isa<ConstantInt>(N->Idx); + } +} + + +void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U, + NodeToValueMap &Loc) { + User *R = U->getUser(); + DEBUG(dbgs() << "Separating chain for node (" << Node << ") user: " + << *R << '\n'); + BasicBlock *PB = cast<Instruction>(R)->getParent(); + + GepNode *N = Node; + GepNode *C = 0, *NewNode = 0; + while (is_constant(N) && !(N->Flags & GepNode::Root)) { + // XXX if (single-use) dont-replicate; + GepNode *NewN = new (*Mem) GepNode(N); + Nodes.push_back(NewN); + Loc[NewN] = PB; + + if (N == Node) + NewNode = NewN; + NewN->Flags &= ~GepNode::Used; + if (C) + C->Parent = NewN; + C = NewN; + N = N->Parent; + } + if (!NewNode) + return; + + // Move over all uses that share the same user as U from Node to NewNode. + NodeToUsesMap::iterator UF = Uses.find(Node); + assert(UF != Uses.end()); + UseSet &Us = UF->second; + UseSet NewUs; + for (UseSet::iterator I = Us.begin(); I != Us.end(); ) { + User *S = (*I)->getUser(); + UseSet::iterator Nx = std::next(I); + if (S == R) { + NewUs.insert(*I); + Us.erase(I); + } + I = Nx; + } + if (Us.empty()) { + Node->Flags &= ~GepNode::Used; + Uses.erase(UF); + } + + // Should at least have U in NewUs. + NewNode->Flags |= GepNode::Used; + DEBUG(dbgs() << "new node: " << NewNode << " " << *NewNode << '\n'); + assert(!NewUs.empty()); + Uses[NewNode] = NewUs; +} + + +void HexagonCommonGEP::separateConstantChains(GepNode *Node, + NodeChildrenMap &NCM, NodeToValueMap &Loc) { + // First approximation: extract all chains. + NodeSet Ns; + nodes_for_root(Node, NCM, Ns); + + DEBUG(dbgs() << "Separating constant chains for node: " << Node << '\n'); + // Collect all used nodes together with the uses from loads and stores, + // where the GEP node could be folded into the load/store instruction. + NodeToUsesMap FNs; // Foldable nodes. + for (NodeSet::iterator I = Ns.begin(), E = Ns.end(); I != E; ++I) { + GepNode *N = *I; + if (!(N->Flags & GepNode::Used)) + continue; + NodeToUsesMap::iterator UF = Uses.find(N); + assert(UF != Uses.end()); + UseSet &Us = UF->second; + // Loads/stores that use the node N. + UseSet LSs; + for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J) { + Use *U = *J; + User *R = U->getUser(); + // We're interested in uses that provide the address. It can happen + // that the value may also be provided via GEP, but we won't handle + // those cases here for now. + if (LoadInst *Ld = dyn_cast<LoadInst>(R)) { + unsigned PtrX = LoadInst::getPointerOperandIndex(); + if (&Ld->getOperandUse(PtrX) == U) + LSs.insert(U); + } else if (StoreInst *St = dyn_cast<StoreInst>(R)) { + unsigned PtrX = StoreInst::getPointerOperandIndex(); + if (&St->getOperandUse(PtrX) == U) + LSs.insert(U); + } + } + // Even if the total use count is 1, separating the chain may still be + // beneficial, since the constant chain may be longer than the GEP alone + // would be (e.g. if the parent node has a constant index and also has + // other children). + if (!LSs.empty()) + FNs.insert(std::make_pair(N, LSs)); + } + + DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs); + + for (NodeToUsesMap::iterator I = FNs.begin(), E = FNs.end(); I != E; ++I) { + GepNode *N = I->first; + UseSet &Us = I->second; + for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J) + separateChainForNode(N, *J, Loc); + } +} + + +void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) { + // Compute the inverse of the Node.Parent links. Also, collect the set + // of root nodes. + NodeChildrenMap NCM; + NodeVect Roots; + invert_find_roots(Nodes, NCM, Roots); + + // Compute the initial placement determined by the users' locations, and + // the locations of the child nodes. + for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) + recalculatePlacementRec(*I, NCM, Loc); + + DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc)); + + if (OptEnableInv) { + for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) + adjustForInvariance(*I, NCM, Loc); + + DEBUG(dbgs() << "Node placement after adjustment for invariance:\n" + << LocationAsBlock(Loc)); + } + if (OptEnableConst) { + for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) + separateConstantChains(*I, NCM, Loc); + } + DEBUG(dbgs() << "Node use information:\n" << Uses); + + // At the moment, there is no further refinement of the initial placement. + // Such a refinement could include splitting the nodes if they are placed + // too far from some of its users. + + DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc)); +} + + +Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, + BasicBlock *LocB) { + DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName() + << " for nodes:\n" << NA); + unsigned Num = NA.size(); + GepNode *RN = NA[0]; + assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root"); + + Value *NewInst = 0; + Value *Input = RN->BaseVal; + Value **IdxList = new Value*[Num+1]; + unsigned nax = 0; + do { + unsigned IdxC = 0; + // If the type of the input of the first node is not a pointer, + // we need to add an artificial i32 0 to the indices (because the + // actual input in the IR will be a pointer). + if (!NA[nax]->PTy->isPointerTy()) { + Type *Int32Ty = Type::getInt32Ty(*Ctx); + IdxList[IdxC++] = ConstantInt::get(Int32Ty, 0); + } + + // Keep adding indices from NA until we have to stop and generate + // an "intermediate" GEP. + while (++nax <= Num) { + GepNode *N = NA[nax-1]; + IdxList[IdxC++] = N->Idx; + if (nax < Num) { + // We have to stop, if the expected type of the output of this node + // is not the same as the input type of the next node. + Type *NextTy = next_type(N->PTy, N->Idx); + if (NextTy != NA[nax]->PTy) + break; + } + } + ArrayRef<Value*> A(IdxList, IdxC); + Type *InpTy = Input->getType(); + Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType(); + NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", At); + DEBUG(dbgs() << "new GEP: " << *NewInst << '\n'); + Input = NewInst; + } while (nax <= Num); + + delete[] IdxList; + return NewInst; +} + + +void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values, + NodeChildrenMap &NCM) { + NodeVect Work; + Work.push_back(Node); + + while (!Work.empty()) { + NodeVect::iterator First = Work.begin(); + GepNode *N = *First; + Work.erase(First); + if (N->Flags & GepNode::Used) { + NodeToUsesMap::iterator UF = Uses.find(N); + assert(UF != Uses.end() && "No use information for used node"); + UseSet &Us = UF->second; + for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) + Values.push_back((*I)->getUser()); + } + NodeChildrenMap::iterator CF = NCM.find(N); + if (CF != NCM.end()) { + NodeVect &Cs = CF->second; + Work.insert(Work.end(), Cs.begin(), Cs.end()); + } + } +} + + +void HexagonCommonGEP::materialize(NodeToValueMap &Loc) { + DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n'); + NodeChildrenMap NCM; + NodeVect Roots; + // Compute the inversion again, since computing placement could alter + // "parent" relation between nodes. + invert_find_roots(Nodes, NCM, Roots); + + while (!Roots.empty()) { + NodeVect::iterator First = Roots.begin(); + GepNode *Root = *First, *Last = *First; + Roots.erase(First); + + NodeVect NA; // Nodes to assemble. + // Append to NA all child nodes up to (and including) the first child + // that: + // (1) has more than 1 child, or + // (2) is used, or + // (3) has a child located in a different block. + bool LastUsed = false; + unsigned LastCN = 0; + // The location may be null if the computation failed (it can legitimately + // happen for nodes created from dead GEPs). + Value *LocV = Loc[Last]; + if (!LocV) + continue; + BasicBlock *LastB = cast<BasicBlock>(LocV); + do { + NA.push_back(Last); + LastUsed = (Last->Flags & GepNode::Used); + if (LastUsed) + break; + NodeChildrenMap::iterator CF = NCM.find(Last); + LastCN = (CF != NCM.end()) ? CF->second.size() : 0; + if (LastCN != 1) + break; + GepNode *Child = CF->second.front(); + BasicBlock *ChildB = cast_or_null<BasicBlock>(Loc[Child]); + if (ChildB != 0 && LastB != ChildB) + break; + Last = Child; + } while (true); + + BasicBlock::iterator InsertAt = LastB->getTerminator(); + if (LastUsed || LastCN > 0) { + ValueVect Urs; + getAllUsersForNode(Root, Urs, NCM); + BasicBlock::iterator FirstUse = first_use_of_in_block(Urs, LastB); + if (FirstUse != LastB->end()) + InsertAt = FirstUse; + } + + // Generate a new instruction for NA. + Value *NewInst = fabricateGEP(NA, InsertAt, LastB); + + // Convert all the children of Last node into roots, and append them + // to the Roots list. + if (LastCN > 0) { + NodeVect &Cs = NCM[Last]; + for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) { + GepNode *CN = *I; + CN->Flags &= ~GepNode::Internal; + CN->Flags |= GepNode::Root; + CN->BaseVal = NewInst; + Roots.push_back(CN); + } + } + + // Lastly, if the Last node was used, replace all uses with the new GEP. + // The uses reference the original GEP values. + if (LastUsed) { + NodeToUsesMap::iterator UF = Uses.find(Last); + assert(UF != Uses.end() && "No use information found"); + UseSet &Us = UF->second; + for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) { + Use *U = *I; + U->set(NewInst); + } + } + } +} + + +void HexagonCommonGEP::removeDeadCode() { + ValueVect BO; + BO.push_back(&Fn->front()); + + for (unsigned i = 0; i < BO.size(); ++i) { + BasicBlock *B = cast<BasicBlock>(BO[i]); + DomTreeNode *N = DT->getNode(B); + typedef GraphTraits<DomTreeNode*> GTN; + typedef GTN::ChildIteratorType Iter; + for (Iter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) + BO.push_back((*I)->getBlock()); + } + + for (unsigned i = BO.size(); i > 0; --i) { + BasicBlock *B = cast<BasicBlock>(BO[i-1]); + BasicBlock::InstListType &IL = B->getInstList(); + typedef BasicBlock::InstListType::reverse_iterator reverse_iterator; + ValueVect Ins; + for (reverse_iterator I = IL.rbegin(), E = IL.rend(); I != E; ++I) + Ins.push_back(&*I); + for (ValueVect::iterator I = Ins.begin(), E = Ins.end(); I != E; ++I) { + Instruction *In = cast<Instruction>(*I); + if (isInstructionTriviallyDead(In)) + In->eraseFromParent(); + } + } +} + + +bool HexagonCommonGEP::runOnFunction(Function &F) { + // For now bail out on C++ exception handling. + for (Function::iterator A = F.begin(), Z = F.end(); A != Z; ++A) + for (BasicBlock::iterator I = A->begin(), E = A->end(); I != E; ++I) + if (isa<InvokeInst>(I) || isa<LandingPadInst>(I)) + return false; + + Fn = &F; + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + PDT = &getAnalysis<PostDominatorTree>(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + Ctx = &F.getContext(); + + Nodes.clear(); + Uses.clear(); + NodeOrder.clear(); + + SpecificBumpPtrAllocator<GepNode> Allocator; + Mem = &Allocator; + + collect(); + common(); + + NodeToValueMap Loc; + computeNodePlacement(Loc); + materialize(Loc); + removeDeadCode(); + +#ifdef XDEBUG + // Run this only when expensive checks are enabled. + verifyFunction(F); +#endif + return true; +} + + +namespace llvm { + FunctionPass *createHexagonCommonGEP() { + return new HexagonCommonGEP(); + } +} diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp index 37ed173a79cd..ce10aeadef94 100644 --- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -1,3 +1,12 @@ +//===--- HexagonExpandCondsets.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + // Replace mux instructions with the corresponding legal instructions. // It is meant to work post-SSA, but still on virtual registers. It was // originally placed between register coalescing and machine instruction diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 868f87e18413..29283c81877e 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -864,13 +864,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF, // Check for an unused caller-saved register. for ( ; *CallerSavedRegs; ++CallerSavedRegs) { MCPhysReg FreeReg = *CallerSavedRegs; - if (MRI.isPhysRegUsed(FreeReg)) + if (!MRI.reg_nodbg_empty(FreeReg)) continue; // Check aliased register usage. bool IsCurrentRegUsed = false; for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI) - if (MRI.isPhysRegUsed(*AI)) { + if (!MRI.reg_nodbg_empty(*AI)) { IsCurrentRegUsed = true; break; } @@ -959,8 +959,11 @@ bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF) } -void HexagonFrameLowering::processFunctionBeforeCalleeSavedScan( - MachineFunction &MF, RegScavenger* RS) const { +void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); auto &HRI = *HST.getRegisterInfo(); @@ -969,11 +972,9 @@ void HexagonFrameLowering::processFunctionBeforeCalleeSavedScan( // If we have a function containing __builtin_eh_return we want to spill and // restore all callee saved registers. Pretend that they are used. if (HasEHReturn) { - MachineRegisterInfo &MRI = MF.getRegInfo(); for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs; ++CSRegs) - if (!MRI.isPhysRegUsed(*CSRegs)) - MRI.setPhysRegUsed(*CSRegs); + SavedRegs.set(*CSRegs); } const TargetRegisterClass &RC = Hexagon::IntRegsRegClass; diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index 89500cb85724..d39ee2c77195 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -45,7 +45,7 @@ public: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS = nullptr) const override; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; bool targetHandlesStackFrameRounding() const override { diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp new file mode 100644 index 000000000000..4d32208bd5aa --- /dev/null +++ b/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -0,0 +1,259 @@ +//===--- HexagonGenExtract.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<unsigned> ExtractCutoff("extract-cutoff", cl::init(~0U), + cl::Hidden, cl::desc("Cutoff for generating \"extract\"" + " instructions")); + +// This prevents generating extract instructions that have the offset of 0. +// One of the reasons for "extract" is to put a sequence of bits in a regis- +// ter, starting at offset 0 (so that these bits can then be used by an +// "insert"). If the bits are already at offset 0, it is better not to gene- +// rate "extract", since logical bit operations can be merged into compound +// instructions (as opposed to "extract"). +static cl::opt<bool> NoSR0("extract-nosr0", cl::init(true), cl::Hidden, + cl::desc("No extract instruction with offset 0")); + +static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden, + cl::desc("Require & in extract patterns")); + +namespace llvm { + void initializeHexagonGenExtractPass(PassRegistry&); + FunctionPass *createHexagonGenExtract(); +} + + +namespace { + class HexagonGenExtract : public FunctionPass { + public: + static char ID; + HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) { + initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const override { + return "Hexagon generate \"extract\" instructions"; + } + virtual bool runOnFunction(Function &F) override; + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<MachineFunctionAnalysis>(); + FunctionPass::getAnalysisUsage(AU); + } + private: + bool visitBlock(BasicBlock *B); + bool convert(Instruction *In); + + unsigned ExtractCount; + DominatorTree *DT; + }; + + char HexagonGenExtract::ID = 0; +} + +INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate " + "\"extract\" instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate " + "\"extract\" instructions", false, false) + + +bool HexagonGenExtract::convert(Instruction *In) { + using namespace PatternMatch; + Value *BF = 0; + ConstantInt *CSL = 0, *CSR = 0, *CM = 0; + BasicBlock *BB = In->getParent(); + LLVMContext &Ctx = BB->getContext(); + bool LogicalSR; + + // (and (shl (lshr x, #sr), #sl), #m) + LogicalSR = true; + bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL)), + m_ConstantInt(CM))); + + if (!Match) { + // (and (shl (ashr x, #sr), #sl), #m) + LogicalSR = false; + Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL)), + m_ConstantInt(CM))); + } + if (!Match) { + // (and (shl x, #sl), #m) + LogicalSR = true; + CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)), + m_ConstantInt(CM))); + if (Match && NoSR0) + return false; + } + if (!Match) { + // (and (lshr x, #sr), #m) + LogicalSR = true; + CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CM))); + } + if (!Match) { + // (and (ashr x, #sr), #m) + LogicalSR = false; + CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CM))); + } + if (!Match) { + CM = 0; + // (shl (lshr x, #sr), #sl) + LogicalSR = true; + Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL))); + } + if (!Match) { + CM = 0; + // (shl (ashr x, #sr), #sl) + LogicalSR = false; + Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)), + m_ConstantInt(CSL))); + } + if (!Match) + return false; + + Type *Ty = BF->getType(); + if (!Ty->isIntegerTy()) + return false; + unsigned BW = Ty->getPrimitiveSizeInBits(); + if (BW != 32 && BW != 64) + return false; + + uint32_t SR = CSR->getZExtValue(); + uint32_t SL = CSL->getZExtValue(); + + if (!CM) { + // If there was no and, and the shift left did not remove all potential + // sign bits created by the shift right, then extractu cannot reproduce + // this value. + if (!LogicalSR && (SR > SL)) + return false; + APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL); + CM = ConstantInt::get(Ctx, A); + } + + // CM is the shifted-left mask. Shift it back right to remove the zero + // bits on least-significant positions. + APInt M = CM->getValue().lshr(SL); + uint32_t T = M.countTrailingOnes(); + + // During the shifts some of the bits will be lost. Calculate how many + // of the original value will remain after shift right and then left. + uint32_t U = BW - std::max(SL, SR); + // The width of the extracted field is the minimum of the original bits + // that remain after the shifts and the number of contiguous 1s in the mask. + uint32_t W = std::min(U, T); + if (W == 0) + return false; + + // Check if the extracted bits are contained within the mask that it is + // and-ed with. The extract operation will copy these bits, and so the + // mask cannot any holes in it that would clear any of the bits of the + // extracted field. + if (!LogicalSR) { + // If the shift right was arithmetic, it could have included some 1 bits. + // It is still ok to generate extract, but only if the mask eliminates + // those bits (i.e. M does not have any bits set beyond U). + APInt C = APInt::getHighBitsSet(BW, BW-U); + if (M.intersects(C) || !APIntOps::isMask(W, M)) + return false; + } else { + // Check if M starts with a contiguous sequence of W times 1 bits. Get + // the low U bits of M (which eliminates the 0 bits shifted in on the + // left), and check if the result is APInt's "mask": + if (!APIntOps::isMask(W, M.getLoBits(U))) + return false; + } + + IRBuilder<> IRB(BB, In); + Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu + : Intrinsic::hexagon_S2_extractup; + Module *Mod = BB->getParent()->getParent(); + Value *ExtF = Intrinsic::getDeclaration(Mod, IntId); + Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)}); + if (SL != 0) + NewIn = IRB.CreateShl(NewIn, SL, CSL->getName()); + In->replaceAllUsesWith(NewIn); + return true; +} + + +bool HexagonGenExtract::visitBlock(BasicBlock *B) { + // Depth-first, bottom-up traversal. + DomTreeNode *DTN = DT->getNode(B); + typedef GraphTraits<DomTreeNode*> GTN; + typedef GTN::ChildIteratorType Iter; + for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I) + visitBlock((*I)->getBlock()); + + // Allow limiting the number of generated extracts for debugging purposes. + bool HasCutoff = ExtractCutoff.getPosition(); + unsigned Cutoff = ExtractCutoff; + + bool Changed = false; + BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin(); + while (true) { + if (HasCutoff && (ExtractCount >= Cutoff)) + return Changed; + bool Last = (I == Begin); + if (!Last) + NextI = std::prev(I); + Instruction *In = &*I; + bool Done = convert(In); + if (HasCutoff && Done) + ExtractCount++; + Changed |= Done; + if (Last) + break; + I = NextI; + } + return Changed; +} + + +bool HexagonGenExtract::runOnFunction(Function &F) { + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + bool Changed; + + // Traverse the function bottom-up, to see super-expressions before their + // sub-expressions. + BasicBlock *Entry = GraphTraits<Function*>::getEntryNode(&F); + Changed = visitBlock(Entry); + + return Changed; +} + + +FunctionPass *llvm::createHexagonGenExtract() { + return new HexagonGenExtract(); +} diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp new file mode 100644 index 000000000000..096da949e77b --- /dev/null +++ b/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -0,0 +1,1598 @@ +//===--- HexagonGenInsert.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexinsert" + +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include "Hexagon.h" +#include "HexagonRegisterInfo.h" +#include "HexagonTargetMachine.h" +#include "HexagonBitTracker.h" + +#include <map> +#include <vector> + +using namespace llvm; + +static cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), + cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation.")); +// The distance cutoff is selected based on the precheckin-perf results: +// cutoffs 20, 25, 35, and 40 are worse than 30. +static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U), + cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert " + "generation.")); + +static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("Enable timing of insert generation")); +static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false), + cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert " + "generation")); + +static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden, + cl::ZeroOrMore); +static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden, + cl::ZeroOrMore); +// Whether to construct constant values via "insert". Could eliminate constant +// extenders, but often not practical. +static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden, + cl::ZeroOrMore); + +namespace { + // The preprocessor gets confused when the DEBUG macro is passed larger + // chunks of code. Use this function to detect debugging. + inline bool isDebug() { +#ifndef NDEBUG + return ::llvm::DebugFlag && ::llvm::isCurrentDebugType(DEBUG_TYPE); +#else + return false; +#endif + } +} + + +namespace { + // Set of virtual registers, based on BitVector. + struct RegisterSet : private BitVector { + RegisterSet() : BitVector() {} + explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} + RegisterSet(const RegisterSet &RS) : BitVector(RS) {} + + using BitVector::clear; + + unsigned find_first() const { + int First = BitVector::find_first(); + if (First < 0) + return 0; + return x2v(First); + } + + unsigned find_next(unsigned Prev) const { + int Next = BitVector::find_next(v2x(Prev)); + if (Next < 0) + return 0; + return x2v(Next); + } + + RegisterSet &insert(unsigned R) { + unsigned Idx = v2x(R); + ensure(Idx); + return static_cast<RegisterSet&>(BitVector::set(Idx)); + } + RegisterSet &remove(unsigned R) { + unsigned Idx = v2x(R); + if (Idx >= size()) + return *this; + return static_cast<RegisterSet&>(BitVector::reset(Idx)); + } + + RegisterSet &insert(const RegisterSet &Rs) { + return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); + } + RegisterSet &remove(const RegisterSet &Rs) { + return static_cast<RegisterSet&>(BitVector::reset(Rs)); + } + + reference operator[](unsigned R) { + unsigned Idx = v2x(R); + ensure(Idx); + return BitVector::operator[](Idx); + } + bool operator[](unsigned R) const { + unsigned Idx = v2x(R); + assert(Idx < size()); + return BitVector::operator[](Idx); + } + bool has(unsigned R) const { + unsigned Idx = v2x(R); + if (Idx >= size()) + return false; + return BitVector::test(Idx); + } + + bool empty() const { + return !BitVector::any(); + } + bool includes(const RegisterSet &Rs) const { + // A.BitVector::test(B) <=> A-B != {} + return !Rs.BitVector::test(*this); + } + bool intersects(const RegisterSet &Rs) const { + return BitVector::anyCommon(Rs); + } + + private: + void ensure(unsigned Idx) { + if (size() <= Idx) + resize(std::max(Idx+1, 32U)); + } + static inline unsigned v2x(unsigned v) { + return TargetRegisterInfo::virtReg2Index(v); + } + static inline unsigned x2v(unsigned x) { + return TargetRegisterInfo::index2VirtReg(x); + } + }; + + + struct PrintRegSet { + PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) + : RS(S), TRI(RI) {} + friend raw_ostream &operator<< (raw_ostream &OS, + const PrintRegSet &P); + private: + const RegisterSet &RS; + const TargetRegisterInfo *TRI; + }; + + raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { + OS << '{'; + for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) + OS << ' ' << PrintReg(R, P.TRI); + OS << " }"; + return OS; + } +} + + +namespace { + // A convenience class to associate unsigned numbers (such as virtual + // registers) with unsigned numbers. + struct UnsignedMap : public DenseMap<unsigned,unsigned> { + UnsignedMap() : BaseType() {} + private: + typedef DenseMap<unsigned,unsigned> BaseType; + }; + + // A utility to establish an ordering between virtual registers: + // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB] + // This is meant as a cache for the ordering of virtual registers defined + // by a potentially expensive comparison function, or obtained by a proce- + // dure that should not be repeated each time two registers are compared. + struct RegisterOrdering : public UnsignedMap { + RegisterOrdering() : UnsignedMap() {} + unsigned operator[](unsigned VR) const { + const_iterator F = find(VR); + assert(F != end()); + return F->second; + } + // Add operator(), so that objects of this class can be used as + // comparators in std::sort et al. + bool operator() (unsigned VR1, unsigned VR2) const { + return operator[](VR1) < operator[](VR2); + } + }; +} + + +namespace { + // Ordering of bit values. This class does not have operator[], but + // is supplies a comparison operator() for use in std:: algorithms. + // The order is as follows: + // - 0 < 1 < ref + // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg), + // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos. + struct BitValueOrdering { + BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {} + bool operator() (const BitTracker::BitValue &V1, + const BitTracker::BitValue &V2) const; + const RegisterOrdering &BaseOrd; + }; +} + + +bool BitValueOrdering::operator() (const BitTracker::BitValue &V1, + const BitTracker::BitValue &V2) const { + if (V1 == V2) + return false; + // V1==0 => true, V2==0 => false + if (V1.is(0) || V2.is(0)) + return V1.is(0); + // Neither of V1,V2 is 0, and V1!=V2. + // V2==1 => false, V1==1 => true + if (V2.is(1) || V1.is(1)) + return !V2.is(1); + // Both V1,V2 are refs. + unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg]; + if (Ind1 != Ind2) + return Ind1 < Ind2; + // If V1.Pos==V2.Pos + assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different"); + return V1.RefI.Pos < V2.RefI.Pos; +} + + +namespace { + // Cache for the BitTracker's cell map. Map lookup has a logarithmic + // complexity, this class will memoize the lookup results to reduce + // the access time for repeated lookups of the same cell. + struct CellMapShadow { + CellMapShadow(const BitTracker &T) : BT(T) {} + const BitTracker::RegisterCell &lookup(unsigned VR) { + unsigned RInd = TargetRegisterInfo::virtReg2Index(VR); + // Grow the vector to at least 32 elements. + if (RInd >= CVect.size()) + CVect.resize(std::max(RInd+16, 32U), 0); + const BitTracker::RegisterCell *CP = CVect[RInd]; + if (CP == 0) + CP = CVect[RInd] = &BT.lookup(VR); + return *CP; + } + + const BitTracker &BT; + + private: + typedef std::vector<const BitTracker::RegisterCell*> CellVectType; + CellVectType CVect; + }; +} + + +namespace { + // Comparator class for lexicographic ordering of virtual registers + // according to the corresponding BitTracker::RegisterCell objects. + struct RegisterCellLexCompare { + RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M) + : BitOrd(BO), CM(M) {} + bool operator() (unsigned VR1, unsigned VR2) const; + private: + const BitValueOrdering &BitOrd; + CellMapShadow &CM; + }; + + // Comparator class for lexicographic ordering of virtual registers + // according to the specified bits of the corresponding BitTracker:: + // RegisterCell objects. + // Specifically, this class will be used to compare bit B of a register + // cell for a selected virtual register R with bit N of any register + // other than R. + struct RegisterCellBitCompareSel { + RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N, + const BitValueOrdering &BO, CellMapShadow &M) + : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {} + bool operator() (unsigned VR1, unsigned VR2) const; + private: + const unsigned SelR, SelB; + const unsigned BitN; + const BitValueOrdering &BitOrd; + CellMapShadow &CM; + }; +} + + +bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const { + // Ordering of registers, made up from two given orderings: + // - the ordering of the register numbers, and + // - the ordering of register cells. + // Def. R1 < R2 if: + // - cell(R1) < cell(R2), or + // - cell(R1) == cell(R2), and index(R1) < index(R2). + // + // For register cells, the ordering is lexicographic, with index 0 being + // the most significant. + if (VR1 == VR2) + return false; + + const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2); + uint16_t W1 = RC1.width(), W2 = RC2.width(); + for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) { + const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i]; + if (V1 != V2) + return BitOrd(V1, V2); + } + // Cells are equal up until the common length. + if (W1 != W2) + return W1 < W2; + + return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2]; +} + + +bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const { + if (VR1 == VR2) + return false; + const BitTracker::RegisterCell &RC1 = CM.lookup(VR1); + const BitTracker::RegisterCell &RC2 = CM.lookup(VR2); + uint16_t W1 = RC1.width(), W2 = RC2.width(); + uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN; + uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN; + // If Bit1 exceeds the width of VR1, then: + // - return false, if at the same time Bit2 exceeds VR2, or + // - return true, otherwise. + // (I.e. "a bit value that does not exist is less than any bit value + // that does exist".) + if (W1 <= Bit1) + return Bit2 < W2; + // If Bit1 is within VR1, but Bit2 is not within VR2, return false. + if (W2 <= Bit2) + return false; + + const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2]; + if (V1 != V2) + return BitOrd(V1, V2); + return false; +} + + +namespace { + class OrderedRegisterList { + typedef std::vector<unsigned> ListType; + public: + OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {} + void insert(unsigned VR); + void remove(unsigned VR); + unsigned operator[](unsigned Idx) const { + assert(Idx < Seq.size()); + return Seq[Idx]; + } + unsigned size() const { + return Seq.size(); + } + + typedef ListType::iterator iterator; + typedef ListType::const_iterator const_iterator; + iterator begin() { return Seq.begin(); } + iterator end() { return Seq.end(); } + const_iterator begin() const { return Seq.begin(); } + const_iterator end() const { return Seq.end(); } + + // Convenience function to convert an iterator to the corresponding index. + unsigned idx(iterator It) const { return It-begin(); } + private: + ListType Seq; + const RegisterOrdering &Ord; + }; + + + struct PrintORL { + PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI) + : RL(L), TRI(RI) {} + friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P); + private: + const OrderedRegisterList &RL; + const TargetRegisterInfo *TRI; + }; + + raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) { + OS << '('; + OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end(); + for (OrderedRegisterList::const_iterator I = B; I != E; ++I) { + if (I != B) + OS << ", "; + OS << PrintReg(*I, P.TRI); + } + OS << ')'; + return OS; + } +} + + +void OrderedRegisterList::insert(unsigned VR) { + iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); + if (L == Seq.end()) + Seq.push_back(VR); + else + Seq.insert(L, VR); +} + + +void OrderedRegisterList::remove(unsigned VR) { + iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); + assert(L != Seq.end()); + Seq.erase(L); +} + + +namespace { + // A record of the insert form. The fields correspond to the operands + // of the "insert" instruction: + // ... = insert(SrcR, InsR, #Wdh, #Off) + struct IFRecord { + IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0) + : SrcR(SR), InsR(IR), Wdh(W), Off(O) {} + unsigned SrcR, InsR; + uint16_t Wdh, Off; + }; + + struct PrintIFR { + PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI) + : IFR(R), TRI(RI) {} + private: + const IFRecord &IFR; + const TargetRegisterInfo *TRI; + friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P); + }; + + raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) { + unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR; + OS << '(' << PrintReg(SrcR, P.TRI) << ',' << PrintReg(InsR, P.TRI) + << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')'; + return OS; + } + + typedef std::pair<IFRecord,RegisterSet> IFRecordWithRegSet; +} + + +namespace llvm { + void initializeHexagonGenInsertPass(PassRegistry&); + FunctionPass *createHexagonGenInsert(); +} + + +namespace { + class HexagonGenInsert : public MachineFunctionPass { + public: + static char ID; + HexagonGenInsert() : MachineFunctionPass(ID), HII(0), HRI(0) { + initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "Hexagon generate \"insert\" instructions"; + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + typedef DenseMap<std::pair<unsigned,unsigned>,unsigned> PairMapType; + + void buildOrderingMF(RegisterOrdering &RO) const; + void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const; + bool isIntClass(const TargetRegisterClass *RC) const; + bool isConstant(unsigned VR) const; + bool isSmallConstant(unsigned VR) const; + bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR, + uint16_t L, uint16_t S) const; + bool findSelfReference(unsigned VR) const; + bool findNonSelfReference(unsigned VR) const; + void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const; + void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const; + unsigned distance(const MachineBasicBlock *FromB, + const MachineBasicBlock *ToB, const UnsignedMap &RPO, + PairMapType &M) const; + unsigned distance(MachineBasicBlock::const_iterator FromI, + MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, + PairMapType &M) const; + bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs); + void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs); + void findRemovableRegisters(unsigned VR, IFRecord IF, + RegisterSet &RMs) const; + void computeRemovableRegisters(); + + void pruneEmptyLists(); + void pruneCoveredSets(unsigned VR); + void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M); + void pruneRegCopies(unsigned VR); + void pruneCandidates(); + void selectCandidates(); + bool generateInserts(); + + bool removeDeadCode(MachineDomTreeNode *N); + + // IFRecord coupled with a set of potentially removable registers: + typedef std::vector<IFRecordWithRegSet> IFListType; + typedef DenseMap<unsigned,IFListType> IFMapType; // vreg -> IFListType + + void dump_map() const; + + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; + + MachineFunction *MFN; + MachineRegisterInfo *MRI; + MachineDominatorTree *MDT; + CellMapShadow *CMS; + + RegisterOrdering BaseOrd; + RegisterOrdering CellOrd; + IFMapType IFMap; + }; + + char HexagonGenInsert::ID = 0; +} + + +void HexagonGenInsert::dump_map() const { + typedef IFMapType::const_iterator iterator; + for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + dbgs() << " " << PrintReg(I->first, HRI) << ":\n"; + const IFListType &LL = I->second; + for (unsigned i = 0, n = LL.size(); i < n; ++i) + dbgs() << " " << PrintIFR(LL[i].first, HRI) << ", " + << PrintRegSet(LL[i].second, HRI) << '\n'; + } +} + + +void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { + unsigned Index = 0; + typedef MachineFunction::const_iterator mf_iterator; + for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) { + const MachineBasicBlock &B = *A; + if (!CMS->BT.reached(&B)) + continue; + typedef MachineBasicBlock::const_iterator mb_iterator; + for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) { + const MachineInstr *MI = &*I; + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned R = MO.getReg(); + assert(MO.getSubReg() == 0 && "Unexpected subregister in definition"); + if (TargetRegisterInfo::isVirtualRegister(R)) + RO.insert(std::make_pair(R, Index++)); + } + } + } + } + // Since some virtual registers may have had their def and uses eliminated, + // they are no longer referenced in the code, and so they will not appear + // in the map. +} + + +void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB, + RegisterOrdering &RO) const { + // Create a vector of all virtual registers (collect them from the base + // ordering RB), and then sort it using the RegisterCell comparator. + BitValueOrdering BVO(RB); + RegisterCellLexCompare LexCmp(BVO, *CMS); + typedef std::vector<unsigned> SortableVectorType; + SortableVectorType VRs; + for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I) + VRs.push_back(I->first); + std::sort(VRs.begin(), VRs.end(), LexCmp); + // Transfer the results to the outgoing register ordering. + for (unsigned i = 0, n = VRs.size(); i < n; ++i) + RO.insert(std::make_pair(VRs[i], i)); +} + + +inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const { + return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass; +} + + +bool HexagonGenInsert::isConstant(unsigned VR) const { + const BitTracker::RegisterCell &RC = CMS->lookup(VR); + uint16_t W = RC.width(); + for (uint16_t i = 0; i < W; ++i) { + const BitTracker::BitValue &BV = RC[i]; + if (BV.is(0) || BV.is(1)) + continue; + return false; + } + return true; +} + + +bool HexagonGenInsert::isSmallConstant(unsigned VR) const { + const BitTracker::RegisterCell &RC = CMS->lookup(VR); + uint16_t W = RC.width(); + if (W > 64) + return false; + uint64_t V = 0, B = 1; + for (uint16_t i = 0; i < W; ++i) { + const BitTracker::BitValue &BV = RC[i]; + if (BV.is(1)) + V |= B; + else if (!BV.is(0)) + return false; + B <<= 1; + } + + // For 32-bit registers, consider: Rd = #s16. + if (W == 32) + return isInt<16>(V); + + // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8) + return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V)); +} + + +bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR, + unsigned InsR, uint16_t L, uint16_t S) const { + const TargetRegisterClass *DstRC = MRI->getRegClass(DstR); + const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR); + const TargetRegisterClass *InsRC = MRI->getRegClass(InsR); + // Only integet (32-/64-bit) register classes. + if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC)) + return false; + // The "source" register must be of the same class as DstR. + if (DstRC != SrcRC) + return false; + if (DstRC == InsRC) + return true; + // A 64-bit register can only be generated from other 64-bit registers. + if (DstRC == &Hexagon::DoubleRegsRegClass) + return false; + // Otherwise, the L and S cannot span 32-bit word boundary. + if (S < 32 && S+L > 32) + return false; + return true; +} + + +bool HexagonGenInsert::findSelfReference(unsigned VR) const { + const BitTracker::RegisterCell &RC = CMS->lookup(VR); + for (uint16_t i = 0, w = RC.width(); i < w; ++i) { + const BitTracker::BitValue &V = RC[i]; + if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR) + return true; + } + return false; +} + + +bool HexagonGenInsert::findNonSelfReference(unsigned VR) const { + BitTracker::RegisterCell RC = CMS->lookup(VR); + for (uint16_t i = 0, w = RC.width(); i < w; ++i) { + const BitTracker::BitValue &V = RC[i]; + if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR) + return true; + } + return false; +} + + +void HexagonGenInsert::getInstrDefs(const MachineInstr *MI, + RegisterSet &Defs) const { + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned R = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Defs.insert(R); + } +} + + +void HexagonGenInsert::getInstrUses(const MachineInstr *MI, + RegisterSet &Uses) const { + for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned R = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R)) + continue; + Uses.insert(R); + } +} + + +unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB, + const MachineBasicBlock *ToB, const UnsignedMap &RPO, + PairMapType &M) const { + // Forward distance from the end of a block to the beginning of it does + // not make sense. This function should not be called with FromB == ToB. + assert(FromB != ToB); + + unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber(); + // If we have already computed it, return the cached result. + PairMapType::iterator F = M.find(std::make_pair(FromN, ToN)); + if (F != M.end()) + return F->second; + unsigned ToRPO = RPO.lookup(ToN); + + unsigned MaxD = 0; + typedef MachineBasicBlock::const_pred_iterator pred_iterator; + for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) { + const MachineBasicBlock *PB = *I; + // Skip back edges. Also, if FromB is a predecessor of ToB, the distance + // along that path will be 0, and we don't need to do any calculations + // on it. + if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO) + continue; + unsigned D = PB->size() + distance(FromB, PB, RPO, M); + if (D > MaxD) + MaxD = D; + } + + // Memoize the result for later lookup. + M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD)); + return MaxD; +} + + +unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI, + MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, + PairMapType &M) const { + const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent(); + if (FB == TB) + return std::distance(FromI, ToI); + unsigned D1 = std::distance(TB->begin(), ToI); + unsigned D2 = distance(FB, TB, RPO, M); + unsigned D3 = std::distance(FromI, FB->end()); + return D1+D2+D3; +} + + +bool HexagonGenInsert::findRecordInsertForms(unsigned VR, + OrderedRegisterList &AVs) { + if (isDebug()) { + dbgs() << LLVM_FUNCTION_NAME << ": " << PrintReg(VR, HRI) + << " AVs: " << PrintORL(AVs, HRI) << "\n"; + } + if (AVs.size() == 0) + return false; + + typedef OrderedRegisterList::iterator iterator; + BitValueOrdering BVO(BaseOrd); + const BitTracker::RegisterCell &RC = CMS->lookup(VR); + uint16_t W = RC.width(); + + typedef std::pair<unsigned,uint16_t> RSRecord; // (reg,shift) + typedef std::vector<RSRecord> RSListType; + // Have a map, with key being the matching prefix length, and the value + // being the list of pairs (R,S), where R's prefix matches VR at S. + // (DenseMap<uint16_t,RSListType> fails to instantiate.) + typedef DenseMap<unsigned,RSListType> LRSMapType; + LRSMapType LM; + + // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S, + // and find matching prefixes from AVs with the rotated RC. Such a prefix + // would match a string of bits (of length L) in RC starting at S. + for (uint16_t S = 0; S < W; ++S) { + iterator B = AVs.begin(), E = AVs.end(); + // The registers in AVs are ordered according to the lexical order of + // the corresponding register cells. This means that the range of regis- + // ters in AVs that match a prefix of length L+1 will be contained in + // the range that matches a prefix of length L. This means that we can + // keep narrowing the search space as the prefix length goes up. This + // helps reduce the overall complexity of the search. + uint16_t L; + for (L = 0; L < W-S; ++L) { + // Compare against VR's bits starting at S, which emulates rotation + // of VR by S. + RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS); + iterator NewB = std::lower_bound(B, E, VR, RCB); + iterator NewE = std::upper_bound(NewB, E, VR, RCB); + // For the registers that are eliminated from the next range, L is + // the longest prefix matching VR at position S (their prefixes + // differ from VR at S+L). If L>0, record this information for later + // use. + if (L > 0) { + for (iterator I = B; I != NewB; ++I) + LM[L].push_back(std::make_pair(*I, S)); + for (iterator I = NewE; I != E; ++I) + LM[L].push_back(std::make_pair(*I, S)); + } + B = NewB, E = NewE; + if (B == E) + break; + } + // Record the final register range. If this range is non-empty, then + // L=W-S. + assert(B == E || L == W-S); + if (B != E) { + for (iterator I = B; I != E; ++I) + LM[L].push_back(std::make_pair(*I, S)); + // If B!=E, then we found a range of registers whose prefixes cover the + // rest of VR from position S. There is no need to further advance S. + break; + } + } + + if (isDebug()) { + dbgs() << "Prefixes matching register " << PrintReg(VR, HRI) << "\n"; + for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) { + dbgs() << " L=" << I->first << ':'; + const RSListType &LL = I->second; + for (unsigned i = 0, n = LL.size(); i < n; ++i) + dbgs() << " (" << PrintReg(LL[i].first, HRI) << ",@" + << LL[i].second << ')'; + dbgs() << '\n'; + } + } + + + bool Recorded = false; + + for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) { + unsigned SrcR = *I; + int FDi = -1, LDi = -1; // First/last different bit. + const BitTracker::RegisterCell &AC = CMS->lookup(SrcR); + uint16_t AW = AC.width(); + for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) { + if (RC[i] == AC[i]) + continue; + if (FDi == -1) + FDi = i; + LDi = i; + } + if (FDi == -1) + continue; // TODO (future): Record identical registers. + // Look for a register whose prefix could patch the range [FD..LD] + // where VR and SrcR differ. + uint16_t FD = FDi, LD = LDi; // Switch to unsigned type. + uint16_t MinL = LD-FD+1; + for (uint16_t L = MinL; L < W; ++L) { + LRSMapType::iterator F = LM.find(L); + if (F == LM.end()) + continue; + RSListType &LL = F->second; + for (unsigned i = 0, n = LL.size(); i < n; ++i) { + uint16_t S = LL[i].second; + // MinL is the minimum length of the prefix. Any length above MinL + // allows some flexibility as to where the prefix can start: + // given the extra length EL=L-MinL, the prefix must start between + // max(0,FD-EL) and FD. + if (S > FD) // Starts too late. + continue; + uint16_t EL = L-MinL; + uint16_t LowS = (EL < FD) ? FD-EL : 0; + if (S < LowS) // Starts too early. + continue; + unsigned InsR = LL[i].first; + if (!isValidInsertForm(VR, SrcR, InsR, L, S)) + continue; + if (isDebug()) { + dbgs() << PrintReg(VR, HRI) << " = insert(" << PrintReg(SrcR, HRI) + << ',' << PrintReg(InsR, HRI) << ",#" << L << ",#" + << S << ")\n"; + } + IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet()); + IFMap[VR].push_back(RR); + Recorded = true; + } + } + } + + return Recorded; +} + + +void HexagonGenInsert::collectInBlock(MachineBasicBlock *B, + OrderedRegisterList &AVs) { + if (isDebug()) + dbgs() << "visiting block BB#" << B->getNumber() << "\n"; + + // First, check if this block is reachable at all. If not, the bit tracker + // will not have any information about registers in it. + if (!CMS->BT.reached(B)) + return; + + bool DoConst = OptConst; + // Keep a separate set of registers defined in this block, so that we + // can remove them from the list of available registers once all DT + // successors have been processed. + RegisterSet BlockDefs, InsDefs; + for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) { + MachineInstr *MI = &*I; + InsDefs.clear(); + getInstrDefs(MI, InsDefs); + // Leave those alone. They are more transparent than "insert". + bool Skip = MI->isCopy() || MI->isRegSequence(); + + if (!Skip) { + // Visit all defined registers, and attempt to find the corresponding + // "insert" representations. + for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) { + // Do not collect registers that are known to be compile-time cons- + // tants, unless requested. + if (!DoConst && isConstant(VR)) + continue; + // If VR's cell contains a reference to VR, then VR cannot be defined + // via "insert". If VR is a constant that can be generated in a single + // instruction (without constant extenders), generating it via insert + // makes no sense. + if (findSelfReference(VR) || isSmallConstant(VR)) + continue; + + findRecordInsertForms(VR, AVs); + } + } + + // Insert the defined registers into the list of available registers + // after they have been processed. + for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) + AVs.insert(VR); + BlockDefs.insert(InsDefs); + } + + MachineDomTreeNode *N = MDT->getNode(B); + typedef GraphTraits<MachineDomTreeNode*> GTN; + typedef GTN::ChildIteratorType ChildIter; + for (ChildIter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) { + MachineBasicBlock *SB = (*I)->getBlock(); + collectInBlock(SB, AVs); + } + + for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR)) + AVs.remove(VR); +} + + +void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF, + RegisterSet &RMs) const { + // For a given register VR and a insert form, find the registers that are + // used by the current definition of VR, and which would no longer be + // needed for it after the definition of VR is replaced with the insert + // form. These are the registers that could potentially become dead. + RegisterSet Regs[2]; + + unsigned S = 0; // Register set selector. + Regs[S].insert(VR); + + while (!Regs[S].empty()) { + // Breadth-first search. + unsigned OtherS = 1-S; + Regs[OtherS].clear(); + for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) { + Regs[S].remove(R); + if (R == IF.SrcR || R == IF.InsR) + continue; + // Check if a given register has bits that are references to any other + // registers. This is to detect situations where the instruction that + // defines register R takes register Q as an operand, but R itself does + // not contain any bits from Q. Loads are examples of how this could + // happen: + // R = load Q + // In this case (assuming we do not have any knowledge about the loaded + // value), we must not treat R as a "conveyance" of the bits from Q. + // (The information in BT about R's bits would have them as constants, + // in case of zero-extending loads, or refs to R.) + if (!findNonSelfReference(R)) + continue; + RMs.insert(R); + const MachineInstr *DefI = MRI->getVRegDef(R); + assert(DefI); + // Do not iterate past PHI nodes to avoid infinite loops. This can + // make the final set a bit less accurate, but the removable register + // sets are an approximation anyway. + if (DefI->isPHI()) + continue; + getInstrUses(DefI, Regs[OtherS]); + } + S = OtherS; + } + // The register VR is added to the list as a side-effect of the algorithm, + // but it is not "potentially removable". A potentially removable register + // is one that may become unused (dead) after conversion to the insert form + // IF, and obviously VR (or its replacement) will not become dead by apply- + // ing IF. + RMs.remove(VR); +} + + +void HexagonGenInsert::computeRemovableRegisters() { + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + IFListType &LL = I->second; + for (unsigned i = 0, n = LL.size(); i < n; ++i) + findRemovableRegisters(I->first, LL[i].first, LL[i].second); + } +} + + +void HexagonGenInsert::pruneEmptyLists() { + // Remove all entries from the map, where the register has no insert forms + // associated with it. + typedef SmallVector<IFMapType::iterator,16> IterListType; + IterListType Prune; + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + if (I->second.size() == 0) + Prune.push_back(I); + } + for (unsigned i = 0, n = Prune.size(); i < n; ++i) + IFMap.erase(Prune[i]); +} + + +void HexagonGenInsert::pruneCoveredSets(unsigned VR) { + IFMapType::iterator F = IFMap.find(VR); + assert(F != IFMap.end()); + IFListType &LL = F->second; + + // First, examine the IF candidates for register VR whose removable-regis- + // ter sets are empty. This means that a given candidate will not help eli- + // minate any registers, but since "insert" is not a constant-extendable + // instruction, using such a candidate may reduce code size if the defini- + // tion of VR is constant-extended. + // If there exists a candidate with a non-empty set, the ones with empty + // sets will not be used and can be removed. + MachineInstr *DefVR = MRI->getVRegDef(VR); + bool DefEx = HII->isConstExtended(DefVR); + bool HasNE = false; + for (unsigned i = 0, n = LL.size(); i < n; ++i) { + if (LL[i].second.empty()) + continue; + HasNE = true; + break; + } + if (!DefEx || HasNE) { + // The definition of VR is not constant-extended, or there is a candidate + // with a non-empty set. Remove all candidates with empty sets. + auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool { + return IR.second.empty(); + }; + auto End = std::remove_if(LL.begin(), LL.end(), IsEmpty); + if (End != LL.end()) + LL.erase(End, LL.end()); + } else { + // The definition of VR is constant-extended, and all candidates have + // empty removable-register sets. Pick the maximum candidate, and remove + // all others. The "maximum" does not have any special meaning here, it + // is only so that the candidate that will remain on the list is selec- + // ted deterministically. + IFRecord MaxIF = LL[0].first; + for (unsigned i = 1, n = LL.size(); i < n; ++i) { + // If LL[MaxI] < LL[i], then MaxI = i. + const IFRecord &IF = LL[i].first; + unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR]; + unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR]; + if (M0 > R0) + continue; + if (M0 == R0) { + if (M1 > R1) + continue; + if (M1 == R1) { + if (MaxIF.Wdh > IF.Wdh) + continue; + if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off) + continue; + } + } + // MaxIF < IF. + MaxIF = IF; + } + // Remove everything except the maximum candidate. All register sets + // are empty, so no need to preserve anything. + LL.clear(); + LL.push_back(std::make_pair(MaxIF, RegisterSet())); + } + + // Now, remove those whose sets of potentially removable registers are + // contained in another IF candidate for VR. For example, given these + // candidates for vreg45, + // %vreg45: + // (%vreg44,%vreg41,#9,#8), { %vreg42 } + // (%vreg43,%vreg41,#9,#8), { %vreg42 %vreg44 } + // remove the first one, since it is contained in the second one. + for (unsigned i = 0, n = LL.size(); i < n; ) { + const RegisterSet &RMi = LL[i].second; + unsigned j = 0; + while (j < n) { + if (j != i && LL[j].second.includes(RMi)) + break; + j++; + } + if (j == n) { // RMi not contained in anything else. + i++; + continue; + } + LL.erase(LL.begin()+i); + n = LL.size(); + } +} + + +void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, + PairMapType &M) { + IFMapType::iterator F = IFMap.find(VR); + assert(F != IFMap.end()); + IFListType &LL = F->second; + unsigned Cutoff = VRegDistCutoff; + const MachineInstr *DefV = MRI->getVRegDef(VR); + + for (unsigned i = LL.size(); i > 0; --i) { + unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR; + const MachineInstr *DefS = MRI->getVRegDef(SR); + const MachineInstr *DefI = MRI->getVRegDef(IR); + unsigned DSV = distance(DefS, DefV, RPO, M); + if (DSV < Cutoff) { + unsigned DIV = distance(DefI, DefV, RPO, M); + if (DIV < Cutoff) + continue; + } + LL.erase(LL.begin()+(i-1)); + } +} + + +void HexagonGenInsert::pruneRegCopies(unsigned VR) { + IFMapType::iterator F = IFMap.find(VR); + assert(F != IFMap.end()); + IFListType &LL = F->second; + + auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool { + return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32); + }; + auto End = std::remove_if(LL.begin(), LL.end(), IsCopy); + if (End != LL.end()) + LL.erase(End, LL.end()); +} + + +void HexagonGenInsert::pruneCandidates() { + // Remove candidates that are not beneficial, regardless of the final + // selection method. + // First, remove candidates whose potentially removable set is a subset + // of another candidate's set. + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) + pruneCoveredSets(I->first); + + UnsignedMap RPO; + typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType; + RPOTType RPOT(MFN); + unsigned RPON = 0; + for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) + RPO[(*I)->getNumber()] = RPON++; + + PairMapType Memo; // Memoization map for distance calculation. + // Remove candidates that would use registers defined too far away. + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) + pruneUsesTooFar(I->first, RPO, Memo); + + pruneEmptyLists(); + + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) + pruneRegCopies(I->first); +} + + +namespace { + // Class for comparing IF candidates for registers that have multiple of + // them. The smaller the candidate, according to this ordering, the better. + // First, compare the number of zeros in the associated potentially remova- + // ble register sets. "Zero" indicates that the register is very likely to + // become dead after this transformation. + // Second, compare "averages", i.e. use-count per size. The lower wins. + // After that, it does not really matter which one is smaller. Resolve + // the tie in some deterministic way. + struct IFOrdering { + IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO) + : UseC(UC), BaseOrd(BO) {} + bool operator() (const IFRecordWithRegSet &A, + const IFRecordWithRegSet &B) const; + private: + void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, + unsigned &Sum) const; + const UnsignedMap &UseC; + const RegisterOrdering &BaseOrd; + }; +} + + +bool IFOrdering::operator() (const IFRecordWithRegSet &A, + const IFRecordWithRegSet &B) const { + unsigned SizeA = 0, ZeroA = 0, SumA = 0; + unsigned SizeB = 0, ZeroB = 0, SumB = 0; + stats(A.second, SizeA, ZeroA, SumA); + stats(B.second, SizeB, ZeroB, SumB); + + // We will pick the minimum element. The more zeros, the better. + if (ZeroA != ZeroB) + return ZeroA > ZeroB; + // Compare SumA/SizeA with SumB/SizeB, lower is better. + uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA; + if (AvgA != AvgB) + return AvgA < AvgB; + + // The sets compare identical so far. Resort to comparing the IF records. + // The actual values don't matter, this is only for determinism. + unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR]; + if (OSA != OSB) + return OSA < OSB; + unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR]; + if (OIA != OIB) + return OIA < OIB; + if (A.first.Wdh != B.first.Wdh) + return A.first.Wdh < B.first.Wdh; + return A.first.Off < B.first.Off; +} + + +void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, + unsigned &Sum) const { + for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) { + UnsignedMap::const_iterator F = UseC.find(R); + assert(F != UseC.end()); + unsigned UC = F->second; + if (UC == 0) + Zero++; + Sum += UC; + Size++; + } +} + + +void HexagonGenInsert::selectCandidates() { + // Some registers may have multiple valid candidates. Pick the best one + // (or decide not to use any). + + // Compute the "removability" measure of R: + // For each potentially removable register R, record the number of regis- + // ters with IF candidates, where R appears in at least one set. + RegisterSet AllRMs; + UnsignedMap UseC, RemC; + IFMapType::iterator End = IFMap.end(); + + for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { + const IFListType &LL = I->second; + RegisterSet TT; + for (unsigned i = 0, n = LL.size(); i < n; ++i) + TT.insert(LL[i].second); + for (unsigned R = TT.find_first(); R; R = TT.find_next(R)) + RemC[R]++; + AllRMs.insert(TT); + } + + for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) { + typedef MachineRegisterInfo::use_nodbg_iterator use_iterator; + typedef SmallSet<const MachineInstr*,16> InstrSet; + InstrSet UIs; + // Count as the number of instructions in which R is used, not the + // number of operands. + use_iterator E = MRI->use_nodbg_end(); + for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I) + UIs.insert(I->getParent()); + unsigned C = UIs.size(); + // Calculate a measure, which is the number of instructions using R, + // minus the "removability" count computed earlier. + unsigned D = RemC[R]; + UseC[R] = (C > D) ? C-D : 0; // doz + } + + + bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0; + if (!SelectAll0 && !SelectHas0) + SelectAll0 = true; + + // The smaller the number UseC for a given register R, the "less used" + // R is aside from the opportunities for removal offered by generating + // "insert" instructions. + // Iterate over the IF map, and for those registers that have multiple + // candidates, pick the minimum one according to IFOrdering. + IFOrdering IFO(UseC, BaseOrd); + for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { + IFListType &LL = I->second; + if (LL.empty()) + continue; + // Get the minimum element, remember it and clear the list. If the + // element found is adequate, we will put it back on the list, other- + // wise the list will remain empty, and the entry for this register + // will be removed (i.e. this register will not be replaced by insert). + IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO); + assert(MinI != LL.end()); + IFRecordWithRegSet M = *MinI; + LL.clear(); + + // We want to make sure that this replacement will have a chance to be + // beneficial, and that means that we want to have indication that some + // register will be removed. The most likely registers to be eliminated + // are the use operands in the definition of I->first. Accept/reject a + // candidate based on how many of its uses it can potentially eliminate. + + RegisterSet Us; + const MachineInstr *DefI = MRI->getVRegDef(I->first); + getInstrUses(DefI, Us); + bool Accept = false; + + if (SelectAll0) { + bool All0 = true; + for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { + if (UseC[R] == 0) + continue; + All0 = false; + break; + } + Accept = All0; + } else if (SelectHas0) { + bool Has0 = false; + for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { + if (UseC[R] != 0) + continue; + Has0 = true; + break; + } + Accept = Has0; + } + if (Accept) + LL.push_back(M); + } + + // Remove candidates that add uses of removable registers, unless the + // removable registers are among replacement candidates. + // Recompute the removable registers, since some candidates may have + // been eliminated. + AllRMs.clear(); + for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { + const IFListType &LL = I->second; + if (LL.size() > 0) + AllRMs.insert(LL[0].second); + } + for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { + IFListType &LL = I->second; + if (LL.size() == 0) + continue; + unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR; + if (AllRMs[SR] || AllRMs[IR]) + LL.clear(); + } + + pruneEmptyLists(); +} + + +bool HexagonGenInsert::generateInserts() { + // Create a new register for each one from IFMap, and store them in the + // map. + UnsignedMap RegMap; + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + unsigned VR = I->first; + const TargetRegisterClass *RC = MRI->getRegClass(VR); + unsigned NewVR = MRI->createVirtualRegister(RC); + RegMap[VR] = NewVR; + } + + // We can generate the "insert" instructions using potentially stale re- + // gisters: SrcR and InsR for a given VR may be among other registers that + // are also replaced. This is fine, we will do the mass "rauw" a bit later. + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + MachineInstr *MI = MRI->getVRegDef(I->first); + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned NewR = RegMap[I->first]; + bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass; + const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert) + : HII->get(Hexagon::S2_insertp); + IFRecord IF = I->second[0].first; + unsigned Wdh = IF.Wdh, Off = IF.Off; + unsigned InsS = 0; + if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) { + InsS = Hexagon::subreg_loreg; + if (Off >= 32) { + InsS = Hexagon::subreg_hireg; + Off -= 32; + } + } + // Advance to the proper location for inserting instructions. This could + // be B.end(). + MachineBasicBlock::iterator At = MI; + if (MI->isPHI()) + At = B.getFirstNonPHI(); + + BuildMI(B, At, DL, D, NewR) + .addReg(IF.SrcR) + .addReg(IF.InsR, 0, InsS) + .addImm(Wdh) + .addImm(Off); + + MRI->clearKillFlags(IF.SrcR); + MRI->clearKillFlags(IF.InsR); + } + + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + MachineInstr *DefI = MRI->getVRegDef(I->first); + MRI->replaceRegWith(I->first, RegMap[I->first]); + DefI->eraseFromParent(); + } + + return true; +} + + +bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { + bool Changed = false; + typedef GraphTraits<MachineDomTreeNode*> GTN; + for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) + Changed |= removeDeadCode(*I); + + MachineBasicBlock *B = N->getBlock(); + std::vector<MachineInstr*> Instrs; + for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) + Instrs.push_back(&*I); + + for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { + MachineInstr *MI = *I; + unsigned Opc = MI->getOpcode(); + // Do not touch lifetime markers. This is why the target-independent DCE + // cannot be used. + if (Opc == TargetOpcode::LIFETIME_START || + Opc == TargetOpcode::LIFETIME_END) + continue; + bool Store = false; + if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store)) + continue; + + bool AllDead = true; + SmallVector<unsigned,2> Regs; + for (ConstMIOperands Op(MI); Op.isValid(); ++Op) { + if (!Op->isReg() || !Op->isDef()) + continue; + unsigned R = Op->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(R) || + !MRI->use_nodbg_empty(R)) { + AllDead = false; + break; + } + Regs.push_back(R); + } + if (!AllDead) + continue; + + B->erase(MI); + for (unsigned I = 0, N = Regs.size(); I != N; ++I) + MRI->markUsesInDebugValueAsUndef(Regs[I]); + Changed = true; + } + + return Changed; +} + + +bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { + bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail; + bool Changed = false; + TimerGroup __G("hexinsert"); + NamedRegionTimer __T("hexinsert", Timing && !TimingDetail); + + // Sanity check: one, but not both. + assert(!OptSelectAll0 || !OptSelectHas0); + + IFMap.clear(); + BaseOrd.clear(); + CellOrd.clear(); + + const auto &ST = MF.getSubtarget<HexagonSubtarget>(); + HII = ST.getInstrInfo(); + HRI = ST.getRegisterInfo(); + MFN = &MF; + MRI = &MF.getRegInfo(); + MDT = &getAnalysis<MachineDominatorTree>(); + + // Clean up before any further processing, so that dead code does not + // get used in a newly generated "insert" instruction. Have a custom + // version of DCE that preserves lifetime markers. Without it, merging + // of stack objects can fail to recognize and merge disjoint objects + // leading to unnecessary stack growth. + Changed |= removeDeadCode(MDT->getRootNode()); + + const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); + BitTracker BTLoc(HE, MF); + BTLoc.trace(isDebug()); + BTLoc.run(); + CellMapShadow MS(BTLoc); + CMS = &MS; + + buildOrderingMF(BaseOrd); + buildOrderingBT(BaseOrd, CellOrd); + + if (isDebug()) { + dbgs() << "Cell ordering:\n"; + for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end(); + I != E; ++I) { + unsigned VR = I->first, Pos = I->second; + dbgs() << PrintReg(VR, HRI) << " -> " << Pos << "\n"; + } + } + + // Collect candidates for conversion into the insert forms. + MachineBasicBlock *RootB = MDT->getRoot(); + OrderedRegisterList AvailR(CellOrd); + + { + NamedRegionTimer _T("collection", "hexinsert", TimingDetail); + collectInBlock(RootB, AvailR); + // Complete the information gathered in IFMap. + computeRemovableRegisters(); + } + + if (isDebug()) { + dbgs() << "Candidates after collection:\n"; + dump_map(); + } + + if (IFMap.empty()) + return false; + + { + NamedRegionTimer _T("pruning", "hexinsert", TimingDetail); + pruneCandidates(); + } + + if (isDebug()) { + dbgs() << "Candidates after pruning:\n"; + dump_map(); + } + + if (IFMap.empty()) + return false; + + { + NamedRegionTimer _T("selection", "hexinsert", TimingDetail); + selectCandidates(); + } + + if (isDebug()) { + dbgs() << "Candidates after selection:\n"; + dump_map(); + } + + // Filter out vregs beyond the cutoff. + if (VRegIndexCutoff.getPosition()) { + unsigned Cutoff = VRegIndexCutoff; + typedef SmallVector<IFMapType::iterator,16> IterListType; + IterListType Out; + for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { + unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first); + if (Idx >= Cutoff) + Out.push_back(I); + } + for (unsigned i = 0, n = Out.size(); i < n; ++i) + IFMap.erase(Out[i]); + } + + { + NamedRegionTimer _T("generation", "hexinsert", TimingDetail); + Changed = generateInserts(); + } + + return Changed; +} + + +FunctionPass *llvm::createHexagonGenInsert() { + return new HexagonGenInsert(); +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert", + "Hexagon generate \"insert\" instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert", + "Hexagon generate \"insert\" instructions", false, false) diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp new file mode 100644 index 000000000000..6905c4f6d125 --- /dev/null +++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -0,0 +1,525 @@ +//===--- HexagonGenPredicate.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "gen-pred" + +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "HexagonTargetMachine.h" + +#include <functional> +#include <queue> +#include <set> +#include <vector> + +using namespace llvm; + +namespace llvm { + void initializeHexagonGenPredicatePass(PassRegistry& Registry); + FunctionPass *createHexagonGenPredicate(); +} + +namespace { + struct Register { + unsigned R, S; + Register(unsigned r = 0, unsigned s = 0) : R(r), S(s) {} + Register(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {} + bool operator== (const Register &Reg) const { + return R == Reg.R && S == Reg.S; + } + bool operator< (const Register &Reg) const { + return R < Reg.R || (R == Reg.R && S < Reg.S); + } + }; + struct PrintRegister { + PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {} + friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR); + private: + Register Reg; + const TargetRegisterInfo &TRI; + }; + raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) + LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) { + return OS << PrintReg(PR.Reg.R, &PR.TRI, PR.Reg.S); + } + + class HexagonGenPredicate : public MachineFunctionPass { + public: + static char ID; + HexagonGenPredicate() : MachineFunctionPass(ID), TII(0), TRI(0), MRI(0) { + initializeHexagonGenPredicatePass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "Hexagon generate predicate operations"; + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + typedef SetVector<MachineInstr*> VectOfInst; + typedef std::set<Register> SetOfReg; + typedef std::map<Register,Register> RegToRegMap; + + const HexagonInstrInfo *TII; + const HexagonRegisterInfo *TRI; + MachineRegisterInfo *MRI; + SetOfReg PredGPRs; + VectOfInst PUsers; + RegToRegMap G2P; + + bool isPredReg(unsigned R); + void collectPredicateGPR(MachineFunction &MF); + void processPredicateGPR(const Register &Reg); + unsigned getPredForm(unsigned Opc); + bool isConvertibleToPredForm(const MachineInstr *MI); + bool isScalarCmp(unsigned Opc); + bool isScalarPred(Register PredReg); + Register getPredRegFor(const Register &Reg); + bool convertToPredForm(MachineInstr *MI); + bool eliminatePredCopies(MachineFunction &MF); + }; + + char HexagonGenPredicate::ID = 0; +} + +INITIALIZE_PASS_BEGIN(HexagonGenPredicate, "hexagon-gen-pred", + "Hexagon generate predicate operations", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(HexagonGenPredicate, "hexagon-gen-pred", + "Hexagon generate predicate operations", false, false) + +bool HexagonGenPredicate::isPredReg(unsigned R) { + if (!TargetRegisterInfo::isVirtualRegister(R)) + return false; + const TargetRegisterClass *RC = MRI->getRegClass(R); + return RC == &Hexagon::PredRegsRegClass; +} + + +unsigned HexagonGenPredicate::getPredForm(unsigned Opc) { + using namespace Hexagon; + + switch (Opc) { + case A2_and: + case A2_andp: + return C2_and; + case A4_andn: + case A4_andnp: + return C2_andn; + case M4_and_and: + return C4_and_and; + case M4_and_andn: + return C4_and_andn; + case M4_and_or: + return C4_and_or; + + case A2_or: + case A2_orp: + return C2_or; + case A4_orn: + case A4_ornp: + return C2_orn; + case M4_or_and: + return C4_or_and; + case M4_or_andn: + return C4_or_andn; + case M4_or_or: + return C4_or_or; + + case A2_xor: + case A2_xorp: + return C2_xor; + + case C2_tfrrp: + return COPY; + } + // The opcode corresponding to 0 is TargetOpcode::PHI. We can use 0 here + // to denote "none", but we need to make sure that none of the valid opcodes + // that we return will ever be 0. + assert(PHI == 0 && "Use different value for <none>"); + return 0; +} + + +bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + if (getPredForm(Opc) != 0) + return true; + + // Comparisons against 0 are also convertible. This does not apply to + // A4_rcmpeqi or A4_rcmpneqi, since they produce values 0 or 1, which + // may not match the value that the predicate register would have if + // it was converted to a predicate form. + switch (Opc) { + case Hexagon::C2_cmpeqi: + case Hexagon::C4_cmpneqi: + if (MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) + return true; + break; + } + return false; +} + + +void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) { + for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) { + MachineBasicBlock &B = *A; + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + MachineInstr *MI = &*I; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::C2_tfrpr: + case TargetOpcode::COPY: + if (isPredReg(MI->getOperand(1).getReg())) { + Register RD = MI->getOperand(0); + if (TargetRegisterInfo::isVirtualRegister(RD.R)) + PredGPRs.insert(RD); + } + break; + } + } + } +} + + +void HexagonGenPredicate::processPredicateGPR(const Register &Reg) { + DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " + << PrintReg(Reg.R, TRI, Reg.S) << "\n"); + typedef MachineRegisterInfo::use_iterator use_iterator; + use_iterator I = MRI->use_begin(Reg.R), E = MRI->use_end(); + if (I == E) { + DEBUG(dbgs() << "Dead reg: " << PrintReg(Reg.R, TRI, Reg.S) << '\n'); + MachineInstr *DefI = MRI->getVRegDef(Reg.R); + DefI->eraseFromParent(); + return; + } + + for (; I != E; ++I) { + MachineInstr *UseI = I->getParent(); + if (isConvertibleToPredForm(UseI)) + PUsers.insert(UseI); + } +} + + +Register HexagonGenPredicate::getPredRegFor(const Register &Reg) { + // Create a predicate register for a given Reg. The newly created register + // will have its value copied from Reg, so that it can be later used as + // an operand in other instructions. + assert(TargetRegisterInfo::isVirtualRegister(Reg.R)); + RegToRegMap::iterator F = G2P.find(Reg); + if (F != G2P.end()) + return F->second; + + DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << PrintRegister(Reg, *TRI)); + MachineInstr *DefI = MRI->getVRegDef(Reg.R); + assert(DefI); + unsigned Opc = DefI->getOpcode(); + if (Opc == Hexagon::C2_tfrpr || Opc == TargetOpcode::COPY) { + assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse()); + Register PR = DefI->getOperand(1); + G2P.insert(std::make_pair(Reg, PR)); + DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n'); + return PR; + } + + MachineBasicBlock &B = *DefI->getParent(); + DebugLoc DL = DefI->getDebugLoc(); + const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; + unsigned NewPR = MRI->createVirtualRegister(PredRC); + + // For convertible instructions, do not modify them, so that they can + // be coverted later. Generate a copy from Reg to NewPR. + if (isConvertibleToPredForm(DefI)) { + MachineBasicBlock::iterator DefIt = DefI; + BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR) + .addReg(Reg.R, 0, Reg.S); + G2P.insert(std::make_pair(Reg, Register(NewPR))); + DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI) << '\n'); + return Register(NewPR); + } + + llvm_unreachable("Invalid argument"); +} + + +bool HexagonGenPredicate::isScalarCmp(unsigned Opc) { + switch (Opc) { + case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpeqp: + case Hexagon::C2_cmpgtp: + case Hexagon::C2_cmpgtup: + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtui: + case Hexagon::C2_cmpgei: + case Hexagon::C2_cmpgeui: + case Hexagon::C4_cmpneqi: + case Hexagon::C4_cmpltei: + case Hexagon::C4_cmplteui: + case Hexagon::C4_cmpneq: + case Hexagon::C4_cmplte: + case Hexagon::C4_cmplteu: + case Hexagon::A4_cmpbeq: + case Hexagon::A4_cmpbeqi: + case Hexagon::A4_cmpbgtu: + case Hexagon::A4_cmpbgtui: + case Hexagon::A4_cmpbgt: + case Hexagon::A4_cmpbgti: + case Hexagon::A4_cmpheq: + case Hexagon::A4_cmphgt: + case Hexagon::A4_cmphgtu: + case Hexagon::A4_cmpheqi: + case Hexagon::A4_cmphgti: + case Hexagon::A4_cmphgtui: + return true; + } + return false; +} + + +bool HexagonGenPredicate::isScalarPred(Register PredReg) { + std::queue<Register> WorkQ; + WorkQ.push(PredReg); + + while (!WorkQ.empty()) { + Register PR = WorkQ.front(); + WorkQ.pop(); + const MachineInstr *DefI = MRI->getVRegDef(PR.R); + if (!DefI) + return false; + unsigned DefOpc = DefI->getOpcode(); + switch (DefOpc) { + case TargetOpcode::COPY: { + const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; + if (MRI->getRegClass(PR.R) != PredRC) + return false; + // If it is a copy between two predicate registers, fall through. + } + case Hexagon::C2_and: + case Hexagon::C2_andn: + case Hexagon::C4_and_and: + case Hexagon::C4_and_andn: + case Hexagon::C4_and_or: + case Hexagon::C2_or: + case Hexagon::C2_orn: + case Hexagon::C4_or_and: + case Hexagon::C4_or_andn: + case Hexagon::C4_or_or: + case Hexagon::C4_or_orn: + case Hexagon::C2_xor: + // Add operands to the queue. + for (ConstMIOperands Mo(DefI); Mo.isValid(); ++Mo) + if (Mo->isReg() && Mo->isUse()) + WorkQ.push(Register(Mo->getReg())); + break; + + // All non-vector compares are ok, everything else is bad. + default: + return isScalarCmp(DefOpc); + } + } + + return true; +} + + +bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) { + DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << MI << " " << *MI); + + unsigned Opc = MI->getOpcode(); + assert(isConvertibleToPredForm(MI)); + unsigned NumOps = MI->getNumOperands(); + for (unsigned i = 0; i < NumOps; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + Register Reg(MO); + if (Reg.S && Reg.S != Hexagon::subreg_loreg) + return false; + if (!PredGPRs.count(Reg)) + return false; + } + + MachineBasicBlock &B = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + unsigned NewOpc = getPredForm(Opc); + // Special case for comparisons against 0. + if (NewOpc == 0) { + switch (Opc) { + case Hexagon::C2_cmpeqi: + NewOpc = Hexagon::C2_not; + break; + case Hexagon::C4_cmpneqi: + NewOpc = TargetOpcode::COPY; + break; + default: + return false; + } + + // If it's a scalar predicate register, then all bits in it are + // the same. Otherwise, to determine whether all bits are 0 or not + // we would need to use any8. + Register PR = getPredRegFor(MI->getOperand(1)); + if (!isScalarPred(PR)) + return false; + // This will skip the immediate argument when creating the predicate + // version instruction. + NumOps = 2; + } + + // Some sanity: check that def is in operand #0. + MachineOperand &Op0 = MI->getOperand(0); + assert(Op0.isDef()); + Register OutR(Op0); + + // Don't use getPredRegFor, since it will create an association between + // the argument and a created predicate register (i.e. it will insert a + // copy if a new predicate register is created). + const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; + Register NewPR = MRI->createVirtualRegister(PredRC); + MachineInstrBuilder MIB = BuildMI(B, MI, DL, TII->get(NewOpc), NewPR.R); + + // Add predicate counterparts of the GPRs. + for (unsigned i = 1; i < NumOps; ++i) { + Register GPR = MI->getOperand(i); + Register Pred = getPredRegFor(GPR); + MIB.addReg(Pred.R, 0, Pred.S); + } + DEBUG(dbgs() << "generated: " << *MIB); + + // Generate a copy-out: NewGPR = NewPR, and replace all uses of OutR + // with NewGPR. + const TargetRegisterClass *RC = MRI->getRegClass(OutR.R); + unsigned NewOutR = MRI->createVirtualRegister(RC); + BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), NewOutR) + .addReg(NewPR.R, 0, NewPR.S); + MRI->replaceRegWith(OutR.R, NewOutR); + MI->eraseFromParent(); + + // If the processed instruction was C2_tfrrp (i.e. Rn = Pm; Pk = Rn), + // then the output will be a predicate register. Do not visit the + // users of it. + if (!isPredReg(NewOutR)) { + Register R(NewOutR); + PredGPRs.insert(R); + processPredicateGPR(R); + } + return true; +} + + +bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { + DEBUG(dbgs() << LLVM_FUNCTION_NAME << "\n"); + const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; + bool Changed = false; + VectOfInst Erase; + + // First, replace copies + // IntR = PredR1 + // PredR2 = IntR + // with + // PredR2 = PredR1 + // Such sequences can be generated when a copy-into-pred is generated from + // a gpr register holding a result of a convertible instruction. After + // the convertible instruction is converted, its predicate result will be + // copied back into the original gpr. + + for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) { + MachineBasicBlock &B = *A; + for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) { + if (I->getOpcode() != TargetOpcode::COPY) + continue; + Register DR = I->getOperand(0); + Register SR = I->getOperand(1); + if (!TargetRegisterInfo::isVirtualRegister(DR.R)) + continue; + if (!TargetRegisterInfo::isVirtualRegister(SR.R)) + continue; + if (MRI->getRegClass(DR.R) != PredRC) + continue; + if (MRI->getRegClass(SR.R) != PredRC) + continue; + assert(!DR.S && !SR.S && "Unexpected subregister"); + MRI->replaceRegWith(DR.R, SR.R); + Erase.insert(I); + Changed = true; + } + } + + for (VectOfInst::iterator I = Erase.begin(), E = Erase.end(); I != E; ++I) + (*I)->eraseFromParent(); + + return Changed; +} + + +bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + TRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + MRI = &MF.getRegInfo(); + PredGPRs.clear(); + PUsers.clear(); + G2P.clear(); + + bool Changed = false; + collectPredicateGPR(MF); + for (SetOfReg::iterator I = PredGPRs.begin(), E = PredGPRs.end(); I != E; ++I) + processPredicateGPR(*I); + + bool Again; + do { + Again = false; + VectOfInst Processed, Copy; + + typedef VectOfInst::iterator iterator; + Copy = PUsers; + for (iterator I = Copy.begin(), E = Copy.end(); I != E; ++I) { + MachineInstr *MI = *I; + bool Done = convertToPredForm(MI); + if (Done) { + Processed.insert(MI); + Again = true; + } + } + Changed |= Again; + + auto Done = [Processed] (MachineInstr *MI) -> bool { + return Processed.count(MI); + }; + PUsers.remove_if(Done); + } while (Again); + + Changed |= eliminatePredCopies(MF); + return Changed; +} + + +FunctionPass *llvm::createHexagonGenPredicate() { + return new HexagonGenPredicate(); +} + diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 6e9e69f5a2c7..c739afb70c15 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -459,6 +459,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); MachineFunction &MF = DAG.getMachineFunction(); + auto PtrVT = getPointerTy(MF.getDataLayout()); // Check for varargs. int NumNamedVarArgParams = -1; @@ -515,8 +516,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<SDValue, 8> MemOpChains; auto &HRI = *Subtarget.getRegisterInfo(); - SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), - getPointerTy()); + SDValue StackPtr = + DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT); // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -574,7 +575,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); if (!isTailCall) { - SDValue C = DAG.getConstant(NumBytes, dl, getPointerTy(), true); + SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true); Chain = DAG.getCALLSEQ_START(Chain, C, dl); } @@ -615,13 +616,13 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (flag_aligned_memcpy) { const char *MemcpyName = "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; - Callee = DAG.getTargetExternalSymbol(MemcpyName, getPointerTy()); + Callee = DAG.getTargetExternalSymbol(MemcpyName, PtrVT); flag_aligned_memcpy = false; } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT); } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT); } // Returns a chain & a flag for retval copy to use. @@ -811,8 +812,8 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock())); } - SDValue JumpTableBase = DAG.getNode(HexagonISD::JT, dl, - getPointerTy(), TargetJT); + SDValue JumpTableBase = DAG.getNode( + HexagonISD::JT, dl, getPointerTy(DAG.getDataLayout()), TargetJT); SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index, DAG.getConstant(2, dl, MVT::i32)); SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase, @@ -1231,16 +1232,17 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); SDLoc dl(Op); - Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); const HexagonTargetObjectFile *TLOF = static_cast<const HexagonTargetObjectFile *>( getTargetMachine().getObjFileLowering()); if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) { - return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result); + return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, Result); } - return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result); + return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, Result); } // Specifies that for loads and stores VT can be promoted to PromotedLdStVT. @@ -1261,7 +1263,8 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); SDValue BA_SD = DAG.getTargetBlockAddress(BA, MVT::i32); SDLoc dl(Op); - return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), BA_SD); + return DAG.getNode(HexagonISD::CONST32_GP, dl, + getPointerTy(DAG.getDataLayout()), BA_SD); } //===----------------------------------------------------------------------===// @@ -2254,6 +2257,7 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Offset = Op.getOperand(1); SDValue Handler = Op.getOperand(2); SDLoc dl(Op); + auto PtrVT = getPointerTy(DAG.getDataLayout()); // Mark function as containing a call to EH_RETURN. HexagonMachineFunctionInfo *FuncInfo = @@ -2262,9 +2266,9 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { unsigned OffsetReg = Hexagon::R28; - SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), - DAG.getRegister(Hexagon::R30, getPointerTy()), - DAG.getIntPtrConstant(4, dl)); + SDValue StoreAddr = + DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT), + DAG.getIntPtrConstant(4, dl)); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset); @@ -2338,8 +2342,7 @@ HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, std::pair<unsigned, const TargetRegisterClass *> HexagonTargetLowering::getRegForInlineAsmConstraint( - const TargetRegisterInfo *TRI, const std::string &Constraint, - MVT VT) const { + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': // R0-R31 @@ -2372,8 +2375,8 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { /// isLegalAddressingMode - Return true if the addressing mode represented by /// AM is legal for this target, for a load/store of the specified type. -bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // Allows a signed-extended 11-bit immediate field. if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) @@ -2463,3 +2466,45 @@ bool llvm::isPositiveHalfWord(SDNode *N) { return true; } } + +Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { + BasicBlock *BB = Builder.GetInsertBlock(); + Module *M = BB->getParent()->getParent(); + Type *Ty = cast<PointerType>(Addr->getType())->getElementType(); + unsigned SZ = Ty->getPrimitiveSizeInBits(); + assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported"); + Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked + : Intrinsic::hexagon_L4_loadd_locked; + Value *Fn = Intrinsic::getDeclaration(M, IntID); + return Builder.CreateCall(Fn, Addr, "larx"); +} + +/// Perform a store-conditional operation to Addr. Return the status of the +/// store. This should be 0 if the store succeeded, non-zero otherwise. +Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder, + Value *Val, Value *Addr, AtomicOrdering Ord) const { + BasicBlock *BB = Builder.GetInsertBlock(); + Module *M = BB->getParent()->getParent(); + Type *Ty = Val->getType(); + unsigned SZ = Ty->getPrimitiveSizeInBits(); + assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported"); + Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked + : Intrinsic::hexagon_S4_stored_locked; + Value *Fn = Intrinsic::getDeclaration(M, IntID); + Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx"); + Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), ""); + Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext())); + return Ext; +} + +bool HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { + // Do not expand loads and stores that don't exceed 64 bits. + return LI->getType()->getPrimitiveSizeInBits() > 64; +} + +bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + // Do not expand loads and stores that don't exceed 64 bits. + return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64; +} + diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index b80e8477eb7b..2642abffaddd 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -165,7 +165,8 @@ bool isPositiveHalfWord(SDNode *N); SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - EVT getSetCCResultType(LLVMContext &C, EVT VT) const override { + EVT getSetCCResultType(const DataLayout &, LLVMContext &C, + EVT VT) const override { if (!VT.isVector()) return MVT::i1; else @@ -179,11 +180,10 @@ bool isPositiveHalfWord(SDNode *N); std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "o") return InlineAsm::Constraint_o; else if (ConstraintCode == "v") @@ -198,8 +198,8 @@ bool isPositiveHalfWord(SDNode *N); /// The type may be VoidTy, in which case only return true if the addressing /// mode is legal for a load/store of any legal type. /// TODO: Handle pre/postinc as well. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, - unsigned AS) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal @@ -207,6 +207,21 @@ bool isPositiveHalfWord(SDNode *N); /// compare a register against the immediate without having to materialize /// the immediate into a register. bool isLegalICmpImmediate(int64_t Imm) const override; + + // Handling of atomic RMW instructions. + bool hasLoadLinkedStoreConditional() const override { + return true; + } + Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const override; + Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, AtomicOrdering Ord) const override; + bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) + const override { + return AtomicRMWExpansionKind::LLSC; + } }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 8f255a08f534..f6bb4a045438 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -221,7 +221,7 @@ unsigned HexagonRegisterInfo::getRARegister() const { unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const HexagonFrameLowering *TFI = getFrameLowering(MF); if (TFI->hasFP(MF)) return Hexagon::R30; return Hexagon::R29; @@ -240,7 +240,8 @@ unsigned HexagonRegisterInfo::getStackRegister() const { bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { - return MF.getSubtarget().getFrameLowering()->hasFP(MF); + const HexagonFrameLowering *TFI = getFrameLowering(MF); + return TFI->hasFP(MF); } diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp index b5db997eb1b8..276cc69eed0f 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -18,12 +18,6 @@ using namespace llvm; bool llvm::flag_aligned_memcpy; -HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const DataLayout &DL) - : TargetSelectionDAGInfo(&DL) {} - -HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() { -} - SDValue HexagonSelectionDAGInfo:: EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h index 8ac2e43f9294..80ac5d7bd9e2 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -20,8 +20,6 @@ namespace llvm { class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit HexagonSelectionDAGInfo(const DataLayout &DL); - ~HexagonSelectionDAGInfo(); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index fe6c4f4298b5..cd482b3e3af1 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -74,7 +74,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM) : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - TSInfo(*TM.getDataLayout()), FrameLowering() { + FrameLowering() { // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index a173a8087832..b50442969a29 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -37,6 +37,18 @@ static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Early expansion of MUX")); +static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true), + cl::Hidden, cl::desc("Generate \"insert\" instructions")); + +static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true), + cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions")); + +static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true), + cl::Hidden, cl::desc("Generate \"extract\" instructions")); + +static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true), + cl::Hidden, cl::desc("Enable conversion of arithmetic operations to " + "predicate instructions")); /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the @@ -60,23 +72,23 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", createVLIWMachineSched); namespace llvm { - FunctionPass *createHexagonExpandCondsets(); - FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, - CodeGenOpt::Level OptLevel); - FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM); - FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM); - FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); FunctionPass *createHexagonCFGOptimizer(); - - FunctionPass *createHexagonSplitConst32AndConst64(); + FunctionPass *createHexagonCommonGEP(); + FunctionPass *createHexagonCopyToCombine(); + FunctionPass *createHexagonExpandCondsets(); FunctionPass *createHexagonExpandPredSpillCode(); - FunctionPass *createHexagonHardwareLoops(); - FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonFixupHwLoops(); + FunctionPass *createHexagonGenExtract(); + FunctionPass *createHexagonGenInsert(); + FunctionPass *createHexagonGenPredicate(); + FunctionPass *createHexagonHardwareLoops(); + FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, + CodeGenOpt::Level OptLevel); FunctionPass *createHexagonNewValueJump(); - FunctionPass *createHexagonCopyToCombine(); FunctionPass *createHexagonPacketizer(); - FunctionPass *createHexagonNewValueJump(); + FunctionPass *createHexagonPeephole(); + FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM); + FunctionPass *createHexagonSplitConst32AndConst64(); } // end namespace llvm; /// HexagonTargetMachine ctor - Create an ILP32 architecture model. @@ -122,6 +134,7 @@ public: return createVLIWMachineSched(C); } + void addIRPasses() override; bool addInstSelector() override; void addPreRegAlloc() override; void addPostRegAlloc() override; @@ -134,6 +147,20 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { return new HexagonPassConfig(this, PM); } +void HexagonPassConfig::addIRPasses() { + TargetPassConfig::addIRPasses(); + bool NoOpt = (getOptLevel() == CodeGenOpt::None); + + addPass(createAtomicExpandPass(TM)); + if (!NoOpt) { + if (EnableCommGEP) + addPass(createHexagonCommonGEP()); + // Replace certain combinations of shifts and ands with extracts. + if (EnableGenExtract) + addPass(createHexagonGenExtract()); + } +} + bool HexagonPassConfig::addInstSelector() { HexagonTargetMachine &TM = getHexagonTargetMachine(); bool NoOpt = (getOptLevel() == CodeGenOpt::None); @@ -144,8 +171,13 @@ bool HexagonPassConfig::addInstSelector() { addPass(createHexagonISelDag(TM, getOptLevel())); if (!NoOpt) { + // Create logical operations on predicate registers. + if (EnableGenPred) + addPass(createHexagonGenPredicate(), false); addPass(createHexagonPeephole()); printAndVerify("After hexagon peephole pass"); + if (EnableGenInsert) + addPass(createHexagonGenInsert(), false); } return false; diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt index 8259055b3f41..9d288af0214a 100644 --- a/lib/Target/Hexagon/LLVMBuild.txt +++ b/lib/Target/Hexagon/LLVMBuild.txt @@ -39,4 +39,5 @@ required_libraries = SelectionDAG Support Target + TransformUtils add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 83ce0abd835e..53305d85fd80 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -46,7 +46,7 @@ MCInstrInfo *llvm::createHexagonMCInstrInfo() { return X; } -static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) { +static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); InitHexagonMCRegisterInfo(X, Hexagon::R0); return X; @@ -54,9 +54,7 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) { static MCSubtargetInfo * createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitHexagonMCSubtargetInfo(X, TT, CPU, FS); - return X; + return createHexagonMCSubtargetInfoImpl(TT, CPU, FS); } namespace { @@ -151,7 +149,8 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index a8f9b52746ad..3f377631c016 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -18,7 +18,6 @@ add_llvm_target(MSP430CodeGen MSP430RegisterInfo.cpp MSP430Subtarget.cpp MSP430TargetMachine.cpp - MSP430SelectionDAGInfo.cpp MSP430AsmPrinter.cpp MSP430MCInstLower.cpp ) diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index be445c56389a..807d1129b5fc 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -37,7 +37,7 @@ static MCInstrInfo *createMSP430MCInstrInfo() { return X; } -static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) { +static MCRegisterInfo *createMSP430MCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); InitMSP430MCRegisterInfo(X, MSP430::PC); return X; @@ -45,12 +45,11 @@ static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) { static MCSubtargetInfo * createMSP430MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitMSP430MCSubtargetInfo(X, TT, CPU, FS); - return X; + return createMSP430MCSubtargetInfoImpl(TT, CPU, FS); } -static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createMSP430MCCodeGenInfo(const Triple &TT, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 5ce5013d898c..8a01334ee2dd 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -254,10 +254,11 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N, AM.Base.Reg = CurDAG->getRegister(0, VT); } - Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ? - CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, - getTargetLowering()->getPointerTy()) : - AM.Base.Reg; + Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) + ? CurDAG->getTargetFrameIndex( + AM.Base.FrameIndex, + getTargetLowering()->getPointerTy(CurDAG->getDataLayout())) + : AM.Base.Reg; if (AM.GV) Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(N), diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index bc51741a836f..29bc8b33988a 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -213,7 +213,7 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. TargetLowering::ConstraintType -MSP430TargetLowering::getConstraintType(const std::string &Constraint) const { +MSP430TargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': @@ -227,8 +227,7 @@ MSP430TargetLowering::getConstraintType(const std::string &Constraint) const { std::pair<unsigned, const TargetRegisterClass *> MSP430TargetLowering::getRegForInlineAsmConstraint( - const TargetRegisterInfo *TRI, const std::string &Constraint, - MVT VT) const { + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { @@ -494,7 +493,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, if (Flags.isByVal()) { int FI = MFI->CreateFixedObject(Flags.getByValSize(), VA.getLocMemOffset(), true); - InVal = DAG.getFrameIndex(FI, getPointerTy()); + InVal = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); } else { // Load the argument to a virtual register unsigned ObjSize = VA.getLocVT().getSizeInBits()/8; @@ -592,10 +591,10 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); - Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, dl, - getPointerTy(), true), - dl); + Chain = DAG.getCALLSEQ_START(Chain, + DAG.getConstant(NumBytes, dl, PtrVT, true), dl); SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass; SmallVector<SDValue, 12> MemOpChains; @@ -630,12 +629,11 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, assert(VA.isMemLoc()); if (!StackPtr.getNode()) - StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SP, getPointerTy()); + StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SP, PtrVT); - SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), - StackPtr, - DAG.getIntPtrConstant(VA.getLocMemOffset(), - dl)); + SDValue PtrOff = + DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset(), dl)); SDValue MemOp; ISD::ArgFlagsTy Flags = Outs[i].Flags; @@ -700,11 +698,8 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getConstant(NumBytes, dl, getPointerTy(), - true), - DAG.getConstant(0, dl, getPointerTy(), true), - InFlag, dl); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, dl, PtrVT, true), + DAG.getConstant(0, dl, PtrVT, true), InFlag, dl); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we @@ -788,30 +783,31 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); // Create the TargetGlobalAddress node, folding in the constant offset. - SDValue Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op), - getPointerTy(), Offset); - return DAG.getNode(MSP430ISD::Wrapper, SDLoc(Op), - getPointerTy(), Result); + SDValue Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op), PtrVT, Offset); + return DAG.getNode(MSP430ISD::Wrapper, SDLoc(Op), PtrVT, Result); } SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); - SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT); - return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result); + return DAG.getNode(MSP430ISD::Wrapper, dl, PtrVT, Result); } SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + auto PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy()); + SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT); - return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result); + return DAG.getNode(MSP430ISD::Wrapper, dl, PtrVT, Result); } static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, @@ -1024,16 +1020,17 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>(); int ReturnAddrIndex = FuncInfo->getRAIndex(); + auto PtrVT = getPointerTy(MF.getDataLayout()); if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. - uint64_t SlotSize = getDataLayout()->getPointerSize(); + uint64_t SlotSize = MF.getDataLayout().getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, true); FuncInfo->setRAIndex(ReturnAddrIndex); } - return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); + return DAG.getFrameIndex(ReturnAddrIndex, PtrVT); } SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, @@ -1046,21 +1043,21 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); + auto PtrVT = getPointerTy(DAG.getDataLayout()); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = - DAG.getConstant(getDataLayout()->getPointerSize(), dl, MVT::i16); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - DAG.getNode(ISD::ADD, dl, getPointerTy(), - FrameAddr, Offset), + DAG.getConstant(DAG.getDataLayout().getPointerSize(), dl, MVT::i16); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo(), false, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, MachinePointerInfo(), false, false, false, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, + MachinePointerInfo(), false, false, false, 0); } SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, @@ -1084,10 +1081,11 @@ SDValue MSP430TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); // Frame index of first vararg argument - SDValue FrameIndex = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), - getPointerTy()); + SDValue FrameIndex = + DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); // Create a store of the frame index to the location operand @@ -1099,9 +1097,9 @@ SDValue MSP430TargetLowering::LowerVASTART(SDValue Op, SDValue MSP430TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); - return DAG.getNode(MSP430ISD::Wrapper, SDLoc(JT), - getPointerTy(), Result); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + return DAG.getNode(MSP430ISD::Wrapper, SDLoc(JT), PtrVT, Result); } /// getPostIndexedAddressParts - returns true by value, base pointer and diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 80d3ae175fb1..2d63852c185b 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -72,7 +72,9 @@ namespace llvm { explicit MSP430TargetLowering(const TargetMachine &TM, const MSP430Subtarget &STI); - MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; } + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i8; + } /// LowerOperation - Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; @@ -96,11 +98,10 @@ namespace llvm { SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; TargetLowering::ConstraintType - getConstraintType(const std::string &Constraint) const override; + getConstraintType(StringRef Constraint) const override; std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; /// isTruncateFree - Return true if it's free to truncate a value of type /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 614467bcd248..2fb82e535e8d 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -37,7 +37,7 @@ MSP430RegisterInfo::MSP430RegisterInfo() const MCPhysReg* MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + const MSP430FrameLowering *TFI = getFrameLowering(*MF); const Function* F = MF->getFunction(); static const MCPhysReg CalleeSavedRegs[] = { MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7, @@ -73,7 +73,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const MSP430FrameLowering *TFI = getFrameLowering(MF); // Mark 4 special registers with subregisters as reserved. Reserved.set(MSP430::PCB); @@ -109,7 +109,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const MSP430FrameLowering *TFI = getFrameLowering(MF); DebugLoc dl = MI.getDebugLoc(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); @@ -156,7 +156,6 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - + const MSP430FrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) ? MSP430::FP : MSP430::SP; } diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp deleted file mode 100644 index 3897ef684d4d..000000000000 --- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the MSP430SelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#include "MSP430TargetMachine.h" -using namespace llvm; - -#define DEBUG_TYPE "msp430-selectiondag-info" - -MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const DataLayout &DL) - : TargetSelectionDAGInfo(&DL) {} - -MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() { -} diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h deleted file mode 100644 index 61a6b19111db..000000000000 --- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the MSP430 subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_MSP430_MSP430SELECTIONDAGINFO_H -#define LLVM_LIB_TARGET_MSP430_MSP430SELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class MSP430TargetMachine; - -class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo { -public: - explicit MSP430SelectionDAGInfo(const DataLayout &DL); - ~MSP430SelectionDAGInfo(); -}; - -} - -#endif diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp index 6374f41c00ea..6216348e4d71 100644 --- a/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/lib/Target/MSP430/MSP430Subtarget.cpp @@ -34,5 +34,4 @@ MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { MSP430Subtarget::MSP430Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) : MSP430GenSubtargetInfo(TT, CPU, FS), FrameLowering(), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - TSInfo(*TM.getDataLayout()) {} + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {} diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h index 81f6f027d45c..ff2656d26dd2 100644 --- a/lib/Target/MSP430/MSP430Subtarget.h +++ b/lib/Target/MSP430/MSP430Subtarget.h @@ -18,8 +18,8 @@ #include "MSP430ISelLowering.h" #include "MSP430InstrInfo.h" #include "MSP430RegisterInfo.h" -#include "MSP430SelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -35,7 +35,7 @@ class MSP430Subtarget : public MSP430GenSubtargetInfo { MSP430FrameLowering FrameLowering; MSP430InstrInfo InstrInfo; MSP430TargetLowering TLInfo; - MSP430SelectionDAGInfo TSInfo; + TargetSelectionDAGInfo TSInfo; public: /// This constructor initializes the data members to match that @@ -60,7 +60,7 @@ public: const MSP430TargetLowering *getTargetLowering() const override { return &TLInfo; } - const MSP430SelectionDAGInfo *getSelectionDAGInfo() const override { + const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } }; diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index f14156dbfa2b..5107d2ae58c3 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -1727,37 +1727,59 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, } namespace { -template <unsigned ShiftAmount> +void emitRX(unsigned Opcode, unsigned DstReg, MCOperand Imm, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions) { + MCInst tmpInst; + tmpInst.setOpcode(Opcode); + tmpInst.addOperand(MCOperand::createReg(DstReg)); + tmpInst.addOperand(Imm); + tmpInst.setLoc(IDLoc); + Instructions.push_back(tmpInst); +} + +void emitRI(unsigned Opcode, unsigned DstReg, int16_t Imm, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions) { + emitRX(Opcode, DstReg, MCOperand::createImm(Imm), IDLoc, Instructions); +} + + +void emitRRX(unsigned Opcode, unsigned DstReg, unsigned SrcReg, MCOperand Imm, + SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) { + MCInst tmpInst; + tmpInst.setOpcode(Opcode); + tmpInst.addOperand(MCOperand::createReg(DstReg)); + tmpInst.addOperand(MCOperand::createReg(SrcReg)); + tmpInst.addOperand(Imm); + tmpInst.setLoc(IDLoc); + Instructions.push_back(tmpInst); +} + +void emitRRR(unsigned Opcode, unsigned DstReg, unsigned SrcReg, + unsigned SrcReg2, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions) { + emitRRX(Opcode, DstReg, SrcReg, MCOperand::createReg(SrcReg2), IDLoc, + Instructions); +} + +void emitRRI(unsigned Opcode, unsigned DstReg, unsigned SrcReg, int16_t Imm, + SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) { + emitRRX(Opcode, DstReg, SrcReg, MCOperand::createImm(Imm), IDLoc, + Instructions); +} + +template <int16_t ShiftAmount> void createLShiftOri(MCOperand Operand, unsigned RegNo, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) { - MCInst tmpInst; - if (ShiftAmount >= 32) { - tmpInst.setOpcode(Mips::DSLL32); - tmpInst.addOperand(MCOperand::createReg(RegNo)); - tmpInst.addOperand(MCOperand::createReg(RegNo)); - tmpInst.addOperand(MCOperand::createImm(ShiftAmount - 32)); - tmpInst.setLoc(IDLoc); - Instructions.push_back(tmpInst); - tmpInst.clear(); - } else if (ShiftAmount > 0) { - tmpInst.setOpcode(Mips::DSLL); - tmpInst.addOperand(MCOperand::createReg(RegNo)); - tmpInst.addOperand(MCOperand::createReg(RegNo)); - tmpInst.addOperand(MCOperand::createImm(ShiftAmount)); - tmpInst.setLoc(IDLoc); - Instructions.push_back(tmpInst); - tmpInst.clear(); - } + if (ShiftAmount >= 32) + emitRRI(Mips::DSLL32, RegNo, RegNo, ShiftAmount - 32, IDLoc, Instructions); + else if (ShiftAmount > 0) + emitRRI(Mips::DSLL, RegNo, RegNo, ShiftAmount, IDLoc, Instructions); + // There's no need for an ORi if the immediate is 0. if (Operand.isImm() && Operand.getImm() == 0) return; - tmpInst.setOpcode(Mips::ORi); - tmpInst.addOperand(MCOperand::createReg(RegNo)); - tmpInst.addOperand(MCOperand::createReg(RegNo)); - tmpInst.addOperand(Operand); - tmpInst.setLoc(IDLoc); - Instructions.push_back(tmpInst); + emitRRX(Mips::ORi, RegNo, RegNo, Operand, IDLoc, Instructions); } template <unsigned ShiftAmount> @@ -1818,12 +1840,22 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, return true; } + if (Is32BitImm) { + if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) { + // Sign extend up to 64-bit so that the predicates match the hardware + // behaviour. In particular, isInt<16>(0xffff8000) and similar should be + // true. + ImmValue = SignExtend64<32>(ImmValue); + } else { + Error(IDLoc, "instruction requires a 32-bit immediate"); + return true; + } + } + bool UseSrcReg = false; if (SrcReg != Mips::NoRegister) UseSrcReg = true; - MCInst tmpInst; - unsigned TmpReg = DstReg; if (UseSrcReg && (DstReg == SrcReg)) { // At this point we need AT to perform the expansions and we exit if it is @@ -1834,29 +1866,26 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, TmpReg = ATReg; } - tmpInst.setLoc(IDLoc); // FIXME: gas has a special case for values that are 000...1111, which // becomes a li -1 and then a dsrl - if (0 <= ImmValue && ImmValue <= 65535) { - // For unsigned and positive signed 16-bit values (0 <= j <= 65535): - // li d,j => ori d,$zero,j - if (!UseSrcReg) - SrcReg = isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; - tmpInst.setOpcode(Mips::ORi); - tmpInst.addOperand(MCOperand::createReg(DstReg)); - tmpInst.addOperand(MCOperand::createReg(SrcReg)); - tmpInst.addOperand(MCOperand::createImm(ImmValue)); - Instructions.push_back(tmpInst); - } else if (ImmValue < 0 && ImmValue >= -32768) { - // For negative signed 16-bit values (-32768 <= j < 0): + if (isInt<16>(ImmValue)) { // li d,j => addiu d,$zero,j if (!UseSrcReg) SrcReg = Mips::ZERO; - tmpInst.setOpcode(Mips::ADDiu); - tmpInst.addOperand(MCOperand::createReg(DstReg)); - tmpInst.addOperand(MCOperand::createReg(SrcReg)); - tmpInst.addOperand(MCOperand::createImm(ImmValue)); - Instructions.push_back(tmpInst); + emitRRI(Mips::ADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions); + } else if (isUInt<16>(ImmValue)) { + // li d,j => ori d,$zero,j + unsigned TmpReg = DstReg; + if (SrcReg == DstReg) { + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + TmpReg = ATReg; + } + + emitRRI(Mips::ORi, TmpReg, Mips::ZERO, ImmValue, IDLoc, Instructions); + if (UseSrcReg) + emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, Instructions); } else if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) { warnIfNoMacro(IDLoc); @@ -1869,30 +1898,16 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, if (!Is32BitImm && !isInt<32>(ImmValue)) { // For DLI, expand to an ORi instead of a LUi to avoid sign-extending the // upper 32 bits. - tmpInst.setOpcode(Mips::ORi); - tmpInst.addOperand(MCOperand::createReg(TmpReg)); - tmpInst.addOperand(MCOperand::createReg(Mips::ZERO)); - tmpInst.addOperand(MCOperand::createImm(Bits31To16)); - tmpInst.setLoc(IDLoc); - Instructions.push_back(tmpInst); - // Move the value to the upper 16 bits by doing a 16-bit left shift. - createLShiftOri<16>(0, TmpReg, IDLoc, Instructions); - } else { - tmpInst.setOpcode(Mips::LUi); - tmpInst.addOperand(MCOperand::createReg(TmpReg)); - tmpInst.addOperand(MCOperand::createImm(Bits31To16)); - Instructions.push_back(tmpInst); - } + emitRRI(Mips::ORi, TmpReg, Mips::ZERO, Bits31To16, IDLoc, Instructions); + emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, Instructions); + } else + emitRI(Mips::LUi, TmpReg, Bits31To16, IDLoc, Instructions); createLShiftOri<0>(Bits15To0, TmpReg, IDLoc, Instructions); if (UseSrcReg) createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions); } else if ((ImmValue & (0xffffLL << 48)) == 0) { - if (Is32BitImm) { - Error(IDLoc, "instruction requires a 32-bit immediate"); - return true; - } warnIfNoMacro(IDLoc); // <------- lo32 ------> @@ -1912,10 +1927,7 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff; uint16_t Bits15To0 = ImmValue & 0xffff; - tmpInst.setOpcode(Mips::LUi); - tmpInst.addOperand(MCOperand::createReg(TmpReg)); - tmpInst.addOperand(MCOperand::createImm(Bits47To32)); - Instructions.push_back(tmpInst); + emitRI(Mips::LUi, TmpReg, Bits47To32, IDLoc, Instructions); createLShiftOri<0>(Bits31To16, TmpReg, IDLoc, Instructions); createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions); @@ -1923,10 +1935,6 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions); } else { - if (Is32BitImm) { - Error(IDLoc, "instruction requires a 32-bit immediate"); - return true; - } warnIfNoMacro(IDLoc); // <------- hi32 ------> <------- lo32 ------> @@ -1948,10 +1956,7 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg, uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff; uint16_t Bits15To0 = ImmValue & 0xffff; - tmpInst.setOpcode(Mips::LUi); - tmpInst.addOperand(MCOperand::createReg(TmpReg)); - tmpInst.addOperand(MCOperand::createImm(Bits63To48)); - Instructions.push_back(tmpInst); + emitRI(Mips::LUi, TmpReg, Bits63To48, IDLoc, Instructions); createLShiftOri<0>(Bits47To32, TmpReg, IDLoc, Instructions); // When Bits31To16 is 0, do a left shift of 32 bits instead of doing @@ -2096,8 +2101,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress( tmpInst.addOperand(MCOperand::createExpr(HiExpr)); Instructions.push_back(tmpInst); - createLShiftOri<0>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(), - Instructions); + emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), SMLoc(), + Instructions); } if (UseSrcReg) @@ -2708,12 +2713,8 @@ void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc, void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, SmallVectorImpl<MCInst> &Instructions) { - MCInst AdduInst; - AdduInst.setOpcode(Is64Bit ? Mips::DADDu : Mips::ADDu); - AdduInst.addOperand(MCOperand::createReg(DstReg)); - AdduInst.addOperand(MCOperand::createReg(SrcReg)); - AdduInst.addOperand(MCOperand::createReg(TrgReg)); - Instructions.push_back(AdduInst); + emitRRR(Is64Bit ? Mips::DADDu : Mips::ADDu, DstReg, SrcReg, TrgReg, SMLoc(), + Instructions); } unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 36ba8e559e0b..bde843afd3d2 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -46,7 +46,6 @@ add_llvm_target(MipsCodeGen MipsSubtarget.cpp MipsTargetMachine.cpp MipsTargetObjectFile.cpp - MipsSelectionDAGInfo.cpp ) add_subdirectory(InstPrinter) diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 9bdf8235a2b4..949ee1474f96 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -59,7 +59,7 @@ static MCInstrInfo *createMipsMCInstrInfo() { return X; } -static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) { +static MCRegisterInfo *createMipsMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); InitMipsMCRegisterInfo(X, Mips::RA); return X; @@ -68,9 +68,7 @@ static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) { static MCSubtargetInfo *createMipsMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { CPU = MIPS_MC::selectMipsCPU(TT, CPU); - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitMipsMCSubtargetInfo(X, TT, CPU, FS); - return X; + return createMipsMCSubtargetInfoImpl(TT, CPU, FS); } static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, @@ -84,7 +82,7 @@ static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createMipsMCCodeGenInfo(const Triple &TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index db2a924a99f9..46cc99c62393 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -152,18 +152,19 @@ Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { return isInt<15>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects(); } -void Mips16FrameLowering:: -processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +void Mips16FrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); const Mips16InstrInfo &TII = *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo()); const MipsRegisterInfo &RI = TII.getRegisterInfo(); const BitVector Reserved = RI.getReservedRegs(MF); bool SaveS2 = Reserved[Mips::S2]; if (SaveS2) - MF.getRegInfo().setPhysRegUsed(Mips::S2); + SavedRegs.set(Mips::S2); if (hasFP(MF)) - MF.getRegInfo().setPhysRegUsed(Mips::S0); + SavedRegs.set(Mips::S0); } const MipsFrameLowering * diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h index f281c927c1c4..b48ed4641ea7 100644 --- a/lib/Target/Mips/Mips16FrameLowering.h +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -38,8 +38,8 @@ public: bool hasReservedCallFrame(const MachineFunction &MF) const override; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; }; } // End llvm namespace diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index 7b6a2a154471..bce2c1eb4485 100644 --- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -120,13 +120,13 @@ void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() { unsigned Mips16SPAliasReg = MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); - return CurDAG->getRegister(Mips16SPAliasReg, - getTargetLowering()->getPointerTy()); + auto PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout()); + return CurDAG->getRegister(Mips16SPAliasReg, PtrVT); } void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { - SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, - getTargetLowering()->getPointerTy()); + auto PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout()); + SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, PtrVT); if (Parent) { switch (Parent->getOpcode()) { case ISD::LOAD: { @@ -155,7 +155,7 @@ void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { } } } - AliasReg = CurDAG->getRegister(Mips::SP, getTargetLowering()->getPointerTy()); + AliasReg = CurDAG->getRegister(Mips::SP, PtrVT); return; } diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index 846e3c964f44..3522cbb1f36a 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -502,7 +502,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops, unsigned V0Reg = Mips::V0; if (NeedMips16Helper) { RegsToPass.push_front(std::make_pair(V0Reg, Callee)); - JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy()); + JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, + getPointerTy(DAG.getDataLayout())); ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(JumpTarget); JumpTarget = getAddrGlobal(S, CLI.DL, JumpTarget.getValueType(), DAG, MipsII::MO_GOT, Chain, diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index c2651b82d285..e2f6fcc17726 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -267,7 +267,7 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, } unsigned MipsFastISel::fastMaterializeAlloca(const AllocaInst *AI) { - assert(TLI.getValueType(AI->getType(), true) == MVT::i32 && + assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i32 && "Alloca should always return a pointer."); DenseMap<const AllocaInst *, int>::iterator SI = @@ -382,7 +382,7 @@ unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) { // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) @@ -507,12 +507,13 @@ bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) { break; case Instruction::IntToPtr: // Look past no-op inttoptrs if its operand is in the same BB. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints if its operand is in the same BB. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; } @@ -532,7 +533,7 @@ bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) { } bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT evt = TLI.getValueType(Ty, true); + EVT evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) return false; @@ -931,8 +932,8 @@ bool MipsFastISel::selectFPExt(const Instruction *I) { if (UnsupportedFPMode) return false; Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f32 || DestVT != MVT::f64) return false; @@ -998,8 +999,8 @@ bool MipsFastISel::selectFPTrunc(const Instruction *I) { if (UnsupportedFPMode) return false; Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f64 || DestVT != MVT::f32) return false; @@ -1415,7 +1416,8 @@ bool MipsFastISel::selectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { CallingConv::ID CC = F.getCallingConv(); SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); + // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; MipsCCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, @@ -1449,7 +1451,7 @@ bool MipsFastISel::selectRet(const Instruction *I) { if (!MRI.getRegClass(SrcReg)->contains(DestReg)) return false; - EVT RVEVT = TLI.getValueType(RV->getType()); + EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; @@ -1493,8 +1495,8 @@ bool MipsFastISel::selectTrunc(const Instruction *I) { Value *Op = I->getOperand(0); EVT SrcVT, DestVT; - SrcVT = TLI.getValueType(Op->getType(), true); - DestVT = TLI.getValueType(I->getType(), true); + SrcVT = TLI.getValueType(DL, Op->getType(), true); + DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) return false; @@ -1521,8 +1523,8 @@ bool MipsFastISel::selectIntExt(const Instruction *I) { return false; EVT SrcEVT, DestEVT; - SrcEVT = TLI.getValueType(SrcTy, true); - DestEVT = TLI.getValueType(DestTy, true); + SrcEVT = TLI.getValueType(DL, SrcTy, true); + DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) @@ -1620,7 +1622,7 @@ unsigned MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, } bool MipsFastISel::selectDivRem(const Instruction *I, unsigned ISDOpcode) { - EVT DestEVT = TLI.getValueType(I->getType(), true); + EVT DestEVT = TLI.getValueType(DL, I->getType(), true); if (!DestEVT.isSimple()) return false; @@ -1685,7 +1687,7 @@ bool MipsFastISel::selectShift(const Instruction *I) { if (!TempReg) return false; - MVT Op0MVT = TLI.getValueType(Op0->getType(), true).getSimpleVT(); + MVT Op0MVT = TLI.getValueType(DL, Op0->getType(), true).getSimpleVT(); bool IsZExt = Opcode == Instruction::LShr; if (!emitIntExt(Op0MVT, Op0Reg, MVT::i32, TempReg, IsZExt)) return false; @@ -1803,7 +1805,7 @@ unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V, unsigned VReg = getRegForValue(V); if (VReg == 0) return 0; - MVT VMVT = TLI.getValueType(V->getType(), true).getSimpleVT(); + MVT VMVT = TLI.getValueType(DL, V->getType(), true).getSimpleVT(); if ((VMVT == MVT::i8) || (VMVT == MVT::i16)) { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); if (!emitIntExt(VMVT, VReg, MVT::i32, TempReg, IsUnsigned)) diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 2c9868ac051d..06502397b6b8 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -59,8 +59,9 @@ bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { /// GOT address into a register. SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = MF->getInfo<MipsFunctionInfo>()->getGlobalBaseReg(); - return CurDAG->getRegister(GlobalBaseReg, - getTargetLowering()->getPointerTy()).getNode(); + return CurDAG->getRegister(GlobalBaseReg, getTargetLowering()->getPointerTy( + CurDAG->getDataLayout())) + .getNode(); } /// ComplexPattern used on MipsInstrInfo diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 67ddcc4dacb9..fbebb9abb4cc 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -466,7 +466,8 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, return Mips::createFastISel(funcInfo, libInfo); } -EVT MipsTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { +EVT MipsTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, + EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); @@ -1579,9 +1580,10 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); SDLoc DL(Op); - EVT PTy = getPointerTy(); + auto &TD = DAG.getDataLayout(); + EVT PTy = getPointerTy(TD); unsigned EntrySize = - DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(*getDataLayout()); + DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); Index = DAG.getNode(ISD::MUL, DL, PTy, Index, DAG.getConstant(EntrySize, DL, PTy)); @@ -1647,10 +1649,10 @@ lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT Ty = Op.getOperand(0).getValueType(); - SDValue Cond = DAG.getNode(ISD::SETCC, DL, - getSetCCResultType(*DAG.getContext(), Ty), - Op.getOperand(0), Op.getOperand(1), - Op.getOperand(4)); + SDValue Cond = + DAG.getNode(ISD::SETCC, DL, getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), Ty), + Op.getOperand(0), Op.getOperand(1), Op.getOperand(4)); return DAG.getNode(ISD::SELECT, DL, Op.getValueType(), Cond, Op.getOperand(2), Op.getOperand(3)); @@ -1723,7 +1725,7 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); SDLoc DL(GA); const GlobalValue *GV = GA->getGlobal(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); TLSModel::Model model = getTargetMachine().getTLSModel(GV); @@ -1831,7 +1833,7 @@ SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), - getPointerTy()); + getPointerTy(MF.getDataLayout())); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. @@ -1850,9 +1852,9 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Node); unsigned ArgSlotSizeInBytes = (ABI.IsN32() || ABI.IsN64()) ? 8 : 4; - SDValue VAListLoad = DAG.getLoad(getPointerTy(), DL, Chain, VAListPtr, - MachinePointerInfo(SV), false, false, false, - 0); + SDValue VAListLoad = + DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, Chain, VAListPtr, + MachinePointerInfo(SV), false, false, false, 0); SDValue VAList = VAListLoad; // Re-align the pointer if necessary. @@ -1874,7 +1876,9 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { } // Increment the pointer, VAList, to the next vaarg. - unsigned ArgSizeInBytes = getDataLayout()->getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())); + auto &TD = DAG.getDataLayout(); + unsigned ArgSizeInBytes = + TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())); SDValue Tmp3 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, DAG.getConstant(RoundUpToAlignment(ArgSizeInBytes, ArgSlotSizeInBytes), @@ -2062,7 +2066,7 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1)); return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain, DAG.getRegister(OffsetReg, Ty), - DAG.getRegister(AddrReg, getPointerTy()), + DAG.getRegister(AddrReg, getPointerTy(MF.getDataLayout())), Chain.getValue(1)); } @@ -2479,15 +2483,16 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, SDValue Arg, SDLoc DL, bool IsTailCall, SelectionDAG &DAG) const { if (!IsTailCall) { - SDValue PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, - DAG.getIntPtrConstant(Offset, DL)); + SDValue PtrOff = + DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr, + DAG.getIntPtrConstant(Offset, DL)); return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo(), false, false, 0); } MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); int FI = MFI->CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false); - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), /*isVolatile=*/ true, false, 0); } @@ -2611,8 +2616,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (!IsTailCall) Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal, DL); - SDValue StackPtr = DAG.getCopyFromReg( - Chain, DL, ABI.IsN64() ? Mips::SP_64 : Mips::SP, getPointerTy()); + SDValue StackPtr = + DAG.getCopyFromReg(Chain, DL, ABI.IsN64() ? Mips::SP_64 : Mips::SP, + getPointerTy(DAG.getDataLayout())); // With EABI is it possible to have 16 args on registers. std::deque< std::pair<unsigned, SDValue> > RegsToPass; @@ -2750,7 +2756,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, IsCallReloc = true; } } else - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0, + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, + getPointerTy(DAG.getDataLayout()), 0, MipsII::MO_NO_FLAG); GlobalOrExternal = true; } @@ -2758,8 +2765,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const char *Sym = S->getSymbol(); if (!ABI.IsN64() && !IsPIC) // !N64 && static - Callee = - DAG.getTargetExternalSymbol(Sym, getPointerTy(), MipsII::MO_NO_FLAG); + Callee = DAG.getTargetExternalSymbol( + Sym, getPointerTy(DAG.getDataLayout()), MipsII::MO_NO_FLAG); else if (LargeGOT) { Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, @@ -3029,7 +3036,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, VA.getLocMemOffset(), true); // Create load nodes to retrieve arguments from the stack - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0); @@ -3174,12 +3181,13 @@ MipsTargetLowering::LowerReturn(SDValue Chain, if (!Reg) llvm_unreachable("sret virtual register not created in the entry block"); - SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); + SDValue Val = + DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout())); unsigned V0 = ABI.IsN64() ? Mips::V0_64 : Mips::V0; Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag); Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(V0, getPointerTy())); + RetOps.push_back(DAG.getRegister(V0, getPointerTy(DAG.getDataLayout()))); } RetOps[0] = Chain; // Update chain. @@ -3198,9 +3206,8 @@ MipsTargetLowering::LowerReturn(SDValue Chain, /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. -MipsTargetLowering::ConstraintType MipsTargetLowering:: -getConstraintType(const std::string &Constraint) const -{ +MipsTargetLowering::ConstraintType +MipsTargetLowering::getConstraintType(StringRef Constraint) const { // Mips specific constraints // GCC config/mips/constraints.md // @@ -3290,9 +3297,8 @@ MipsTargetLowering::getSingleConstraintMatchWeight( /// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag /// that is returned indicates whether parsing was successful. The second flag /// is true if the numeric part exists. -static std::pair<bool, bool> -parsePhysicalReg(StringRef C, std::string &Prefix, - unsigned long long &Reg) { +static std::pair<bool, bool> parsePhysicalReg(StringRef C, StringRef &Prefix, + unsigned long long &Reg) { if (C.front() != '{' || C.back() != '}') return std::make_pair(false, false); @@ -3300,7 +3306,7 @@ parsePhysicalReg(StringRef C, std::string &Prefix, StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1; I = std::find_if(B, E, std::ptr_fun(isdigit)); - Prefix.assign(B, I - B); + Prefix = StringRef(B, I - B); // The second flag is set to false if no numeric characters were found. if (I == E) @@ -3316,7 +3322,7 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const TargetRegisterClass *RC; - std::string Prefix; + StringRef Prefix; unsigned long long Reg; std::pair<bool, bool> R = parsePhysicalReg(C, Prefix, Reg); @@ -3332,7 +3338,7 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const { RC = TRI->getRegClass(Prefix == "hi" ? Mips::HI32RegClassID : Mips::LO32RegClassID); return std::make_pair(*(RC->begin()), RC); - } else if (Prefix.compare(0, 4, "$msa") == 0) { + } else if (Prefix.startswith("$msa")) { // Parse $msa(ir|csr|access|save|modify|request|map|unmap) // No numeric characters follow the name. @@ -3390,7 +3396,7 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const { /// pointer. std::pair<unsigned, const TargetRegisterClass *> MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, + StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { @@ -3546,8 +3552,8 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op, TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } -bool MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // No global is ever allowed as a base. if (AM.BaseGV) @@ -3625,7 +3631,7 @@ void MipsTargetLowering::copyByValRegs( FrameObjOffset = VA.getLocMemOffset(); // Create frame object. - EVT PtrTy = getPointerTy(); + EVT PtrTy = getPointerTy(DAG.getDataLayout()); int FI = MFI->CreateFixedObject(FrameObjSize, FrameObjOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); InVals.push_back(FIN); @@ -3662,7 +3668,8 @@ void MipsTargetLowering::passByValArg( unsigned OffsetInBytes = 0; // From beginning of struct unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes); - EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); + EVT PtrTy = getPointerTy(DAG.getDataLayout()), + RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); unsigned NumRegs = LastReg - FirstReg; if (NumRegs) { @@ -3787,7 +3794,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains, unsigned Reg = addLiveIn(MF, ArgRegs[I], RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true); - SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo(), false, false, 0); cast<StoreSDNode>(Store.getNode())->getMemOperand()->setValue( @@ -3920,8 +3927,8 @@ MipsTargetLowering::emitPseudoSELECT(MachineInstr *MI, MachineBasicBlock *BB, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned MipsTargetLowering::getRegisterByName(const char* RegName, - EVT VT) const { +unsigned MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const { // Named registers is expected to be fairly rare. For now, just support $28 // since the linux kernel uses it. if (Subtarget.isGP64bit()) { diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index bc9a1ce64097..6fe8f830d35d 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -227,7 +227,9 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override; - MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, @@ -247,7 +249,8 @@ namespace llvm { const char *getTargetNodeName(unsigned Opcode) const override; /// getSetCCResultType - get the ISD::SETCC result ValueType - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; @@ -263,7 +266,8 @@ namespace llvm { void HandleByVal(CCState *, unsigned &, unsigned) const override; - unsigned getRegisterByName(const char* RegName, EVT VT) const override; + unsigned getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const override; protected: SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; @@ -478,8 +482,7 @@ namespace llvm { bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; // Inline asm support - ConstraintType - getConstraintType(const std::string &Constraint) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. @@ -493,8 +496,7 @@ namespace llvm { std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is @@ -505,8 +507,8 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "R") return InlineAsm::Constraint_R; else if (ConstraintCode == "ZC") @@ -514,8 +516,8 @@ namespace llvm { return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, - unsigned AS) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index ec7bf314c641..096b3bee5d07 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -621,10 +621,17 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { !MFI->hasVarSizedObjects(); } -void MipsSEFrameLowering:: -processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - MachineRegisterInfo &MRI = MF.getRegInfo(); +/// Mark \p Reg and all registers aliasing it in the bitset. +void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, unsigned Reg) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + SavedRegs.set(*AI); +} + +void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); MipsABIInfo ABI = STI.getABI(); unsigned FP = ABI.GetFramePtr(); @@ -632,10 +639,10 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Mark $fp as used if function has dedicated frame pointer. if (hasFP(MF)) - MRI.setPhysRegUsed(FP); + setAliasRegs(MF, SavedRegs, FP); // Mark $s7 as used if function has dedicated base pointer. if (hasBP(MF)) - MRI.setPhysRegUsed(BP); + setAliasRegs(MF, SavedRegs, BP); // Create spill slots for eh data registers if function calls eh_return. if (MipsFI->callsEhReturn()) diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index 2fcd6bbb9a15..9cb32e6c7829 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -34,8 +34,8 @@ public: bool hasReservedCallFrame(const MachineFunction &MF) const override; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; unsigned ehDataReg(unsigned I) const; }; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 990a2f8d8c85..cb46d731da29 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -841,7 +841,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { } case MipsISD::ThreadPointer: { - EVT PtrVT = getTargetLowering()->getPointerTy(); + EVT PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout()); unsigned RdhwrOpc, DestReg; if (PtrVT == MVT::i32) { diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index ae2837a8582c..b319fd07884b 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -838,8 +838,9 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) if (!VT.isVector()) - return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), - VT, TL->getScalarShiftAmountTy(VT), DAG); + return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), VT, + TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), + DAG); return SDValue(N, 0); } diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp deleted file mode 100644 index edd8f670707f..000000000000 --- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the MipsSelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#include "MipsTargetMachine.h" -using namespace llvm; - -#define DEBUG_TYPE "mips-selectiondag-info" - -MipsSelectionDAGInfo::MipsSelectionDAGInfo(const DataLayout &DL) - : TargetSelectionDAGInfo(&DL) {} - -MipsSelectionDAGInfo::~MipsSelectionDAGInfo() { -} diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h deleted file mode 100644 index 061423fbeb86..000000000000 --- a/lib/Target/Mips/MipsSelectionDAGInfo.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the Mips subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_MIPS_MIPSSELECTIONDAGINFO_H -#define LLVM_LIB_TARGET_MIPS_MIPSSELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class MipsTargetMachine; - -class MipsSelectionDAGInfo : public TargetSelectionDAGInfo { -public: - explicit MipsSelectionDAGInfo(const DataLayout &DL); - ~MipsSelectionDAGInfo(); -}; - -} - -#endif diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index c41bb16a58ea..471b6e19a8bb 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -70,7 +70,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU, HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false), InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), - HasMSA(false), TM(TM), TargetTriple(TT), TSInfo(*TM.getDataLayout()), + HasMSA(false), TM(TM), TargetTriple(TT), TSInfo(), InstrInfo( MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 5f9296812e1c..1db8881404c9 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -18,10 +18,10 @@ #include "MipsFrameLowering.h" #include "MipsISelLowering.h" #include "MipsInstrInfo.h" -#include "MipsSelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -140,7 +140,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo { Triple TargetTriple; - const MipsSelectionDAGInfo TSInfo; + const TargetSelectionDAGInfo TSInfo; std::unique_ptr<const MipsInstrInfo> InstrInfo; std::unique_ptr<const MipsFrameLowering> FrameLowering; std::unique_ptr<const MipsTargetLowering> TLInfo; @@ -275,7 +275,7 @@ public: void setHelperClassesMips16(); void setHelperClassesMipsSE(); - const MipsSelectionDAGInfo *getSelectionDAGInfo() const override { + const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } const MipsInstrInfo *getInstrInfo() const override { return InstrInfo.get(); } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index c820668befa0..1c77745d130b 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -62,7 +62,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU, if (!ABI.IsN64()) Ret += "-p:32:32"; - // 8 and 16 bit integers only need no have natural alignment, but try to + // 8 and 16 bit integers only need to have natural alignment, but try to // align them to 32 bits. 64 bit integers have natural alignment. Ret += "-i8:8:32-i16:16:32-i64:64"; @@ -237,7 +237,7 @@ TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() { if (Subtarget->allowMixed16_32()) { DEBUG(errs() << "No Target Transform Info Pass Added\n"); // FIXME: This is no longer necessary as the TTI returned is per-function. - return TargetTransformInfo(getDataLayout()); + return TargetTransformInfo(F.getParent()->getDataLayout()); } DEBUG(errs() << "Target Transform Info Pass Added\n"); diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 221d2f093aeb..ad7302037cad 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -37,7 +37,7 @@ static MCInstrInfo *createNVPTXMCInstrInfo() { return X; } -static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) { +static MCRegisterInfo *createNVPTXMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); // PTX does not have a return address register. InitNVPTXMCRegisterInfo(X, 0); @@ -46,13 +46,13 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) { static MCSubtargetInfo * createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitNVPTXMCSubtargetInfo(X, TT, CPU, FS); - return X; + return createNVPTXMCSubtargetInfoImpl(TT, CPU, FS); } -static MCCodeGenInfo *createNVPTXMCCodeGenInfo( - StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { +static MCCodeGenInfo *createNVPTXMCCodeGenInfo(const Triple &TT, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); // The default relocation model is used regardless of what the client has diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index cadd7a46cd9d..ecb0f0a1d0a1 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -340,7 +340,7 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { } void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getDataLayout(); + const DataLayout &DL = getDataLayout(); const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); Type *Ty = F->getReturnType(); @@ -366,20 +366,20 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { O << ".param .b" << size << " func_retval0"; } else if (isa<PointerType>(Ty)) { - O << ".param .b" << TLI->getPointerTy().getSizeInBits() + O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits() << " func_retval0"; } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { - unsigned totalsz = TD->getTypeAllocSize(Ty); + unsigned totalsz = DL.getTypeAllocSize(Ty); unsigned retAlignment = 0; if (!llvm::getAlign(*F, 0, retAlignment)) - retAlignment = TD->getABITypeAlignment(Ty); + retAlignment = DL.getABITypeAlignment(Ty); O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz << "]"; } else llvm_unreachable("Unknown return type"); } else { SmallVector<EVT, 16> vtparts; - ComputeValueVTs(*TLI, Ty, vtparts); + ComputeValueVTs(*TLI, DL, Ty, vtparts); unsigned idx = 0; for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; @@ -1433,7 +1433,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { bool first = true; bool isKernelFunc = llvm::isKernelFunction(*F); bool isABI = (nvptxSubtarget->getSmVersion() >= 20); - MVT thePointerTy = TLI->getPointerTy(); + MVT thePointerTy = TLI->getPointerTy(*TD); O << "(\n"; @@ -1579,7 +1579,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Further, if a part is vector, print the above for // each vector element. SmallVector<EVT, 16> vtparts; - ComputeValueVTs(*TLI, ETy, vtparts); + ComputeValueVTs(*TLI, getDataLayout(), ETy, vtparts); for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 09e0bd5d3d88..b75cf4040312 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -80,14 +80,14 @@ static bool IsPTXVectorType(MVT VT) { /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the /// same number of types as the Ins/Outs arrays in LowerFormalArguments, /// LowerCall, and LowerReturn. -static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, - SmallVectorImpl<EVT> &ValueVTs, +static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl<EVT> &ValueVTs, SmallVectorImpl<uint64_t> *Offsets = nullptr, uint64_t StartingOffset = 0) { SmallVector<EVT, 16> TempVTs; SmallVector<uint64_t, 16> TempOffsets; - ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset); + ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { EVT VT = TempVTs[i]; uint64_t Off = TempOffsets[i]; @@ -885,15 +885,16 @@ SDValue NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); - return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT); + return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op); } -std::string -NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, - const SmallVectorImpl<ISD::OutputArg> &Outs, - unsigned retAlignment, - const ImmutableCallSite *CS) const { +std::string NVPTXTargetLowering::getPrototype( + const DataLayout &DL, Type *retTy, const ArgListTy &Args, + const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment, + const ImmutableCallSite *CS) const { + auto PtrVT = getPointerTy(DL); bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); @@ -921,13 +922,12 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, O << ".param .b" << size << " _"; } else if (isa<PointerType>(retTy)) { - O << ".param .b" << getPointerTy().getSizeInBits() << " _"; + O << ".param .b" << PtrVT.getSizeInBits() << " _"; } else if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) { - O << ".param .align " - << retAlignment - << " .b8 _[" - << getDataLayout()->getTypeAllocSize(retTy) << "]"; + auto &DL = CS->getCalledFunction()->getParent()->getDataLayout(); + O << ".param .align " << retAlignment << " .b8 _[" + << DL.getTypeAllocSize(retTy) << "]"; } else { llvm_unreachable("Unknown return type"); } @@ -936,7 +936,6 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, O << "_ ("; bool first = true; - MVT thePointerTy = getPointerTy(); unsigned OIdx = 0; for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { @@ -950,24 +949,23 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, if (Ty->isAggregateType() || Ty->isVectorTy()) { unsigned align = 0; const CallInst *CallI = cast<CallInst>(CS->getInstruction()); - const DataLayout *TD = getDataLayout(); // +1 because index 0 is reserved for return type alignment if (!llvm::getAlign(*CallI, i + 1, align)) - align = TD->getABITypeAlignment(Ty); - unsigned sz = TD->getTypeAllocSize(Ty); + align = DL.getABITypeAlignment(Ty); + unsigned sz = DL.getTypeAllocSize(Ty); O << ".param .align " << align << " .b8 "; O << "_"; O << "[" << sz << "]"; // update the index for Outs SmallVector<EVT, 16> vtparts; - ComputeValueVTs(*this, Ty, vtparts); + ComputeValueVTs(*this, DL, Ty, vtparts); if (unsigned len = vtparts.size()) OIdx += len - 1; continue; } // i8 types in IR will be i16 types in SDAG - assert((getValueType(Ty) == Outs[OIdx].VT || - (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && + assert((getValueType(DL, Ty) == Outs[OIdx].VT || + (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && "type mismatch between callee prototype and arguments"); // scalar type unsigned sz = 0; @@ -976,7 +974,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, if (sz < 32) sz = 32; } else if (isa<PointerType>(Ty)) - sz = thePointerTy.getSizeInBits(); + sz = PtrVT.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); O << ".param .b" << sz << " "; @@ -988,7 +986,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, Type *ETy = PTy->getElementType(); unsigned align = Outs[OIdx].Flags.getByValAlign(); - unsigned sz = getDataLayout()->getTypeAllocSize(ETy); + unsigned sz = DL.getTypeAllocSize(ETy); O << ".param .align " << align << " .b8 "; O << "_"; O << "[" << sz << "]"; @@ -1002,7 +1000,6 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS, Type *Ty, unsigned Idx) const { - const DataLayout *TD = getDataLayout(); unsigned Align = 0; const Value *DirectCallee = CS->getCalledFunction(); @@ -1043,7 +1040,8 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, // Call is indirect or alignment information is not available, fall back to // the ABI type alignment - return TD->getABITypeAlignment(Ty); + auto &DL = CS->getCaller()->getParent()->getDataLayout(); + return DL.getABITypeAlignment(Ty); } SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, @@ -1064,9 +1062,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert(isABI && "Non-ABI compilation is not supported"); if (!isABI) return Chain; - const DataLayout *TD = getDataLayout(); MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); + auto &DL = MF.getDataLayout(); SDValue tempChain = Chain; Chain = DAG.getCALLSEQ_START(Chain, @@ -1096,11 +1094,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // aggregate SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> Offsets; - ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0); + ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets, + 0); unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); // declare .param .align <align> .b8 .param<n>[<size>]; - unsigned sz = TD->getTypeAllocSize(Ty); + unsigned sz = DL.getTypeAllocSize(Ty); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl, MVT::i32), @@ -1137,10 +1136,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, continue; } if (Ty->isVectorTy()) { - EVT ObjectVT = getValueType(Ty); + EVT ObjectVT = getValueType(DL, Ty); unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); // declare .param .align <align> .b8 .param<n>[<size>]; - unsigned sz = TD->getTypeAllocSize(Ty); + unsigned sz = DL.getTypeAllocSize(Ty); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl, MVT::i32), @@ -1321,7 +1320,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<uint64_t, 16> Offsets; const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); assert(PTy && "Type of a byval parameter should be pointer"); - ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0); + ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(), + vtparts, &Offsets, 0); // declare .param .align <align> .b8 .param<n>[<size>]; unsigned sz = Outs[OIdx].Flags.getByValSize(); @@ -1342,9 +1342,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT elemtype = vtparts[j]; int curOffset = Offsets[j]; unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); - SDValue srcAddr = - DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], - DAG.getConstant(curOffset, dl, getPointerTy())); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], + DAG.getConstant(curOffset, dl, PtrVT)); SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(), false, false, false, PartAlign); @@ -1371,12 +1371,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Handle Result if (Ins.size() > 0) { SmallVector<EVT, 16> resvtparts; - ComputeValueVTs(*this, retTy, resvtparts); + ComputeValueVTs(*this, DL, retTy, resvtparts); // Declare // .param .align 16 .b8 retval0[<size-in-bytes>], or // .param .b<size-in-bits> retval0 - unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); + unsigned resultsz = DL.getTypeAllocSizeInBits(retTy); // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for // these three types to match the logic in // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. @@ -1415,7 +1415,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // The prototype is embedded in a string and put as the operand for a // CallPrototype SDNode which will print out to the value of the string. SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); - std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS); + std::string Proto = + getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS); const char *ProtoStr = nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); SDValue ProtoOps[] = { @@ -1477,7 +1478,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Generate loads from param memory/moves from registers for result if (Ins.size() > 0) { if (retTy && retTy->isVectorTy()) { - EVT ObjectVT = getValueType(retTy); + EVT ObjectVT = getValueType(DL, retTy); unsigned NumElts = ObjectVT.getVectorNumElements(); EVT EltVT = ObjectVT.getVectorElementType(); assert(STI.getTargetLowering()->getNumRegisters(F->getContext(), @@ -1590,13 +1591,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); InVals.push_back(Elt); } - Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); + Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); } } } else { SmallVector<EVT, 16> VTs; SmallVector<uint64_t, 16> Offsets; - ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0); + ComputePTXValueVTs(*this, DAG.getDataLayout(), retTy, VTs, &Offsets, 0); assert(VTs.size() == Ins.size() && "Bad value decomposition"); unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0); for (unsigned i = 0, e = Ins.size(); i != e; ++i) { @@ -1608,8 +1609,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<EVT, 4> LoadRetVTs; EVT TheLoadType = VTs[i]; - if (retTy->isIntegerTy() && - TD->getTypeAllocSizeInBits(retTy) < 32) { + if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) { // This is for integer types only, and specifically not for // aggregates. LoadRetVTs.push_back(MVT::i32); @@ -1920,11 +1920,11 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { } MemSDNode *MemSD = cast<MemSDNode>(N); - const DataLayout *TD = getDataLayout(); + const DataLayout &TD = DAG.getDataLayout(); unsigned Align = MemSD->getAlignment(); unsigned PrefAlign = - TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); + TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); if (Align < PrefAlign) { // This store is not sufficiently aligned, so bail out and let this vector // store be scalarized. Note that we may still be able to emit smaller @@ -2064,7 +2064,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); - const DataLayout *TD = getDataLayout(); + const DataLayout &DL = DAG.getDataLayout(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); const Function *F = MF.getFunction(); const AttributeSet &PAL = F->getAttributes(); @@ -2118,7 +2119,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (Ty->isAggregateType()) { SmallVector<EVT, 16> vtparts; - ComputePTXValueVTs(*this, Ty, vtparts); + ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts); assert(vtparts.size() > 0 && "empty aggregate type not expected"); for (unsigned parti = 0, parte = vtparts.size(); parti != parte; ++parti) { @@ -2130,7 +2131,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( continue; } if (Ty->isVectorTy()) { - EVT ObjectVT = getValueType(Ty); + EVT ObjectVT = getValueType(DL, Ty); unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); for (unsigned parti = 0; parti < NumRegs; ++parti) { InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); @@ -2156,13 +2157,14 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // NOTE: Here, we lose the ability to issue vector loads for vectors // that are a part of a struct. This should be investigated in the // future. - ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0); + ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets, + 0); assert(vtparts.size() > 0 && "empty aggregate type not expected"); bool aggregateIsPacked = false; if (StructType *STy = llvm::dyn_cast<StructType>(Ty)) aggregateIsPacked = STy->isPacked(); - SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + SDValue Arg = getParamSymbol(DAG, idx, PtrVT); for (unsigned parti = 0, parte = vtparts.size(); parti != parte; ++parti) { EVT partVT = vtparts[parti]; @@ -2170,12 +2172,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( PointerType::get(partVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); SDValue srcAddr = - DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(offsets[parti], dl, getPointerTy())); - unsigned partAlign = - aggregateIsPacked ? 1 - : TD->getABITypeAlignment( - partVT.getTypeForEVT(F->getContext())); + DAG.getNode(ISD::ADD, dl, PtrVT, Arg, + DAG.getConstant(offsets[parti], dl, PtrVT)); + unsigned partAlign = aggregateIsPacked + ? 1 + : DL.getABITypeAlignment( + partVT.getTypeForEVT(F->getContext())); SDValue p; if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) { ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? @@ -2198,8 +2200,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( continue; } if (Ty->isVectorTy()) { - EVT ObjectVT = getValueType(Ty); - SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + EVT ObjectVT = getValueType(DL, Ty); + SDValue Arg = getParamSymbol(DAG, idx, PtrVT); unsigned NumElts = ObjectVT.getVectorNumElements(); assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts && "Vector was not scalarized"); @@ -2212,9 +2214,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( Value *SrcValue = Constant::getNullValue(PointerType::get( EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); SDValue P = DAG.getLoad( - EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, - false, true, - TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); + EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, + true, + DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); if (P.getNode()) P.getNode()->setIROrder(idx + 1); @@ -2229,9 +2231,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( Value *SrcValue = Constant::getNullValue(PointerType::get( VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); SDValue P = DAG.getLoad( - VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, - false, true, - TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); + VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false, + true, + DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); if (P.getNode()) P.getNode()->setIROrder(idx + 1); @@ -2269,13 +2271,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( Value *SrcValue = Constant::getNullValue( PointerType::get(VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); - SDValue SrcAddr = - DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(Ofst, dl, getPointerTy())); + SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, + DAG.getConstant(Ofst, dl, PtrVT)); SDValue P = DAG.getLoad( VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, false, true, - TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); + DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext()))); if (P.getNode()) P.getNode()->setIROrder(idx + 1); @@ -2288,7 +2289,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); InVals.push_back(Elt); } - Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); + Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); } InsIdx += NumElts; } @@ -2298,23 +2299,24 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( continue; } // A plain scalar. - EVT ObjectVT = getValueType(Ty); + EVT ObjectVT = getValueType(DL, Ty); // If ABI, load from the param symbol - SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + SDValue Arg = getParamSymbol(DAG, idx, PtrVT); Value *srcValue = Constant::getNullValue(PointerType::get( ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); SDValue p; if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) { ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; - p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg, - MachinePointerInfo(srcValue), ObjectVT, false, false, - false, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); + p = DAG.getExtLoad( + ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue), + ObjectVT, false, false, false, + DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); } else { - p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg, - MachinePointerInfo(srcValue), false, false, false, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); + p = DAG.getLoad( + Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), false, + false, false, + DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); } if (p.getNode()) p.getNode()->setIROrder(idx + 1); @@ -2329,10 +2331,10 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // machine instruction fails because TargetExternalSymbol // (not lowered) is target dependent, and CopyToReg assumes // the source is lowered. - EVT ObjectVT = getValueType(Ty); + EVT ObjectVT = getValueType(DL, Ty); assert(ObjectVT == Ins[InsIdx].VT && "Ins type did not match function type"); - SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + SDValue Arg = getParamSymbol(DAG, idx, PtrVT); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) p.getNode()->setIROrder(idx + 1); @@ -2370,7 +2372,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, MachineFunction &MF = DAG.getMachineFunction(); const Function *F = MF.getFunction(); Type *RetTy = F->getReturnType(); - const DataLayout *TD = getDataLayout(); + const DataLayout &TD = DAG.getDataLayout(); bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); @@ -2384,7 +2386,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(NumElts == Outs.size() && "Bad scalarization of return value"); // const_cast can be removed in later LLVM versions - EVT EltVT = getValueType(RetTy).getVectorElementType(); + EVT EltVT = getValueType(TD, RetTy).getVectorElementType(); bool NeedExtend = false; if (EltVT.getSizeInBits() < 16) NeedExtend = true; @@ -2435,7 +2437,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); unsigned PerStoreOffset = - TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); + TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); for (unsigned i = 0; i < NumElts; i += VecSize) { // Get values @@ -2493,7 +2495,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, } else { SmallVector<EVT, 16> ValVTs; SmallVector<uint64_t, 16> Offsets; - ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0); + ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0); assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); for (unsigned i = 0, e = Outs.size(); i != e; ++i) { @@ -2509,8 +2511,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, TheValType.getVectorElementType(), TmpVal, DAG.getIntPtrConstant(j, dl)); EVT TheStoreType = ValVTs[i]; - if (RetTy->isIntegerTy() && - TD->getTypeAllocSizeInBits(RetTy) < 32) { + if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) { // The following zero-extension is for integer types only, and // specifically not for aggregates. TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal); @@ -3291,14 +3292,14 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_ldu_global_i: case Intrinsic::nvvm_ldu_global_f: case Intrinsic::nvvm_ldu_global_p: { - + auto &DL = I.getModule()->getDataLayout(); Info.opc = ISD::INTRINSIC_W_CHAIN; if (Intrinsic == Intrinsic::nvvm_ldu_global_i) - Info.memVT = getValueType(I.getType()); + Info.memVT = getValueType(DL, I.getType()); else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) - Info.memVT = getPointerTy(); + Info.memVT = getPointerTy(DL); else - Info.memVT = getValueType(I.getType()); + Info.memVT = getValueType(DL, I.getType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.vol = 0; @@ -3311,14 +3312,15 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_p: { + auto &DL = I.getModule()->getDataLayout(); Info.opc = ISD::INTRINSIC_W_CHAIN; if (Intrinsic == Intrinsic::nvvm_ldg_global_i) - Info.memVT = getValueType(I.getType()); + Info.memVT = getValueType(DL, I.getType()); else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) - Info.memVT = getPointerTy(); + Info.memVT = getPointerTy(DL); else - Info.memVT = getValueType(I.getType()); + Info.memVT = getValueType(DL, I.getType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.vol = 0; @@ -3731,8 +3733,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( /// Used to guide target specific optimizations, like loop strength reduction /// (LoopStrengthReduce.cpp) and memory optimization for address mode /// (CodeGenPrepare.cpp) -bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // AddrMode - This represents an addressing mode of: @@ -3772,7 +3774,7 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. NVPTXTargetLowering::ConstraintType -NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { +NVPTXTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: @@ -3794,7 +3796,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { std::pair<unsigned, const TargetRegisterClass *> NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, + StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { @@ -4251,7 +4253,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, - const DataLayout *TD, SmallVectorImpl<SDValue> &Results) { EVT ResVT = N->getValueType(0); SDLoc DL(N); @@ -4282,8 +4283,9 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, LoadSDNode *LD = cast<LoadSDNode>(N); unsigned Align = LD->getAlignment(); + auto &TD = DAG.getDataLayout(); unsigned PrefAlign = - TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); + TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); if (Align < PrefAlign) { // This load is not sufficiently aligned, so bail out and let this vector // load be scalarized. Note that we may still be able to emit smaller @@ -4495,7 +4497,7 @@ void NVPTXTargetLowering::ReplaceNodeResults( default: report_fatal_error("Unhandled custom legalization"); case ISD::LOAD: - ReplaceLoadVector(N, DAG, getDataLayout(), Results); + ReplaceLoadVector(N, DAG, Results); return; case ISD::INTRINSIC_W_CHAIN: ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index ed94775b3002..e5c37321a33b 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -456,24 +456,23 @@ public: /// Used to guide target specific optimizations, like loop strength /// reduction (LoopStrengthReduce.cpp) and memory optimization for /// address mode (CodeGenPrepare.cpp) - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned getFunctionAlignment(const Function *F) const; - EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override { + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, + EVT VT) const override { if (VT.isVector()) return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); return MVT::i1; } - ConstraintType - getConstraintType(const std::string &Constraint) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; SDValue LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -483,7 +482,7 @@ public: SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const override; - std::string getPrototype(Type *, const ArgListTy &, + std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &, const SmallVectorImpl<ISD::OutputArg> &, unsigned retAlignment, const ImmutableCallSite *CS) const; @@ -501,7 +500,9 @@ public: const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts - MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 6ab0fadf9a35..0bf72febc4a0 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -57,7 +57,6 @@ char NVPTXLowerAggrCopies::ID = 0; // Lower MemTransferInst or load-store pair to loop static void convertTransferToLoop( Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len, - //unsigned numLoads, bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) { Type *indType = len->getType(); @@ -84,6 +83,8 @@ static void convertTransferToLoop( ind->addIncoming(ConstantInt::get(indType, 0), origBB); // load from srcAddr+ind + // TODO: we can leverage the align parameter of llvm.memcpy for more efficient + // word-sized loads and stores. Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind), srcVolatile); // store at dstAddr+ind @@ -137,13 +138,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // // Collect all the aggrLoads, aggrMemcpys and addrMemsets. // - //const BasicBlock *firstBB = &F.front(); // first BB in F for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { - //BasicBlock *bb = BI; for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ++II) { if (LoadInst *load = dyn_cast<LoadInst>(II)) { - if (!load->hasOneUse()) continue; @@ -152,7 +150,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { User *use = load->user_back(); if (StoreInst *store = dyn_cast<StoreInst>(use)) { - if (store->getOperand(0) != load) //getValueOperand + if (store->getOperand(0) != load) continue; aggrLoads.push_back(load); } @@ -188,8 +186,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // // Do the transformation of an aggr load/copy/set to a loop // - for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) { - LoadInst *load = aggrLoads[i]; + for (LoadInst *load : aggrLoads) { StoreInst *store = dyn_cast<StoreInst>(*load->user_begin()); Value *srcAddr = load->getOperand(0); Value *dstAddr = store->getOperand(1); @@ -203,20 +200,19 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { load->eraseFromParent(); } - for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) { - MemTransferInst *cpy = aggrMemcpys[i]; - Value *len = cpy->getLength(); - // llvm 2.7 version of memcpy does not have volatile - // operand yet. So always making it non-volatile - // optimistically, so that we don't see unnecessary - // st.volatile in ptx - convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false, - false, Context, F); + for (MemTransferInst *cpy : aggrMemcpys) { + convertTransferToLoop(/* splitAt */ cpy, + /* srcAddr */ cpy->getSource(), + /* dstAddr */ cpy->getDest(), + /* len */ cpy->getLength(), + /* srcVolatile */ cpy->isVolatile(), + /* dstVolatile */ cpy->isVolatile(), + /* Context */ Context, + /* Function F */ F); cpy->eraseFromParent(); } - for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) { - MemSetInst *memsetinst = aggrMemsets[i]; + for (MemSetInst *memsetinst : aggrMemsets) { Value *len = memsetinst->getLength(); Value *val = memsetinst->getValue(); convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context, diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index e83f735a551e..5a83371b07f1 100644 --- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -2,7 +2,7 @@ // // The LLVM Compiler Infrastructure // -// This file is distributed under the University of Illinois Open Source +// This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// @@ -115,7 +115,7 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { replaceImageHandle(Handle, MF); - return true; + return true; } return false; diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 71645dca69c5..bd2509a3c8c9 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -48,7 +48,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU, const NVPTXTargetMachine &TM) : NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM), InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), - TSInfo(TM.getDataLayout()), FrameLowering() {} + FrameLowering() {} bool NVPTXSubtarget::hasImageHandles() const { // Enable handles for Kepler+, where CUDA supports indirect surfaces and diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 9d9072efc382..248f9e117d83 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -148,8 +148,9 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { } TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis( - [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); }); + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo(NVPTXTTIImpl(this, F)); + }); } void NVPTXPassConfig::addIRPasses() { diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index dc81802f4b5a..e7250cdba5ac 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -94,7 +94,7 @@ unsigned NVPTXTTIImpl::getArithmeticInstrCost( TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -117,3 +117,15 @@ unsigned NVPTXTTIImpl::getArithmeticInstrCost( Opd1PropInfo, Opd2PropInfo); } } + +void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, + TTI::UnrollingPreferences &UP) { + BaseT::getUnrollingPreferences(L, UP); + + // Enable partial unrolling and runtime unrolling, but reduce the + // threshold. This partially unrolls small loops which are often + // unrolled by the PTX to SASS compiler and unrolling earlier can be + // beneficial. + UP.Partial = UP.Runtime = true; + UP.PartialThreshold = UP.Threshold / 4; +} diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 4280888988f9..5bcd1e27a558 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -37,8 +37,9 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> { const NVPTXTargetLowering *getTLI() const { return TLI; }; public: - explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM) - : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {} + explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()), + TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. NVPTXTTIImpl(const NVPTXTTIImpl &Arg) @@ -46,18 +47,6 @@ public: NVPTXTTIImpl(NVPTXTTIImpl &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - NVPTXTTIImpl &operator=(const NVPTXTTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - NVPTXTTIImpl &operator=(NVPTXTTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } bool hasBranchDivergence() { return true; } @@ -69,6 +58,8 @@ public: TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); + + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); }; } // end namespace llvm diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index fe168a547597..c0c83cc258b8 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -33,7 +33,6 @@ add_llvm_target(PowerPCCodeGen PPCTargetObjectFile.cpp PPCTargetTransformInfo.cpp PPCTOCRegDeps.cpp - PPCSelectionDAGInfo.cpp PPCTLSDynamicCall.cpp PPCVSXCopy.cpp PPCVSXFMAMutate.cpp diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 5c38fe173d96..30f232a9a91e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -51,10 +51,9 @@ static MCInstrInfo *createPPCMCInstrInfo() { return X; } -static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) { - Triple TheTriple(TT); - bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 || - TheTriple.getArch() == Triple::ppc64le); +static MCRegisterInfo *createPPCMCRegisterInfo(const Triple &TT) { + bool isPPC64 = + (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); unsigned Flavour = isPPC64 ? 0 : 1; unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR; @@ -65,9 +64,7 @@ static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) { static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitPPCMCSubtargetInfo(X, TT, CPU, FS); - return X; + return createPPCMCSubtargetInfoImpl(TT, CPU, FS); } static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, @@ -90,22 +87,20 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createPPCMCCodeGenInfo(const Triple &TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) { - Triple T(TT); - if (T.isOSDarwin()) + if (TT.isOSDarwin()) RM = Reloc::DynamicNoPIC; else RM = Reloc::Static; } if (CM == CodeModel::Default) { - Triple T(TT); - if (!T.isOSDarwin() && - (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le)) + if (!TT.isOSDarwin() && + (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le)) CM = CodeModel::Medium; } X->initMCCodeGenInfo(RM, CM, OL); @@ -231,7 +226,7 @@ static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, static MCTargetStreamer * createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { const Triple &TT = STI.getTargetTriple(); - if (TT.getObjectFormat() == Triple::ELF) + if (TT.isOSBinFormatELF()) return new PPCTargetELFStreamer(S); return new PPCTargetMachOStreamer(S); } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 87a5236e711f..199a0debf88b 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -197,7 +197,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, // External or weakly linked global variables need non-lazily-resolved stubs if (TM.getRelocationModel() != Reloc::Static && - (GV->isDeclaration() || GV->isWeakForLinker())) { + !GV->isStrongDefinitionForLinker()) { if (!GV->hasHiddenVisibility()) { SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = @@ -369,28 +369,70 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && "High 16 bits of call target should be zero."); unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); - EncodedBytes = 6*4; + EncodedBytes = 0; // Materialize the jump address: EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8) .addReg(ScratchReg) .addImm((CallTarget >> 32) & 0xFFFF)); + ++EncodedBytes; EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC) .addReg(ScratchReg) .addReg(ScratchReg) .addImm(32).addImm(16)); + ++EncodedBytes; EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8) .addReg(ScratchReg) .addReg(ScratchReg) .addImm((CallTarget >> 16) & 0xFFFF)); + ++EncodedBytes; EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8) .addReg(ScratchReg) .addReg(ScratchReg) .addImm(CallTarget & 0xFFFF)); + // Save the current TOC pointer before the remote call. + int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40; + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD) + .addReg(PPC::X2) + .addImm(TOCSaveOffset) + .addReg(PPC::X1)); + ++EncodedBytes; + + + // If we're on ELFv1, then we need to load the actual function pointer from + // the function descriptor. + if (!Subtarget->isELFv2ABI()) { + // Load the new TOC pointer and the function address, but not r11 + // (needing this is rare, and loading it here would prevent passing it + // via a 'nest' parameter. + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD) + .addReg(PPC::X2) + .addImm(8) + .addReg(ScratchReg)); + ++EncodedBytes; + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD) + .addReg(ScratchReg) + .addImm(0) + .addReg(ScratchReg)); + ++EncodedBytes; + } + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg)); + ++EncodedBytes; EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8)); + ++EncodedBytes; + + // Restore the TOC pointer after the call. + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD) + .addReg(PPC::X2) + .addImm(TOCSaveOffset) + .addReg(PPC::X1)); + ++EncodedBytes; } + // Each instruction is 4 bytes. + EncodedBytes *= 4; + // Emit padding. unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); assert(NumBytes >= EncodedBytes && @@ -624,7 +666,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { IsExternal = GV->isDeclaration(); IsCommon = GV->hasCommonLinkage(); IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() && - (GV->isDeclaration() || GV->isWeakForLinker()); + !GV->isStrongDefinitionForLinker(); IsAvailExt = GV->hasAvailableExternallyLinkage(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); @@ -706,7 +748,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = getSymbol(GV); IsExternal = GV->isDeclaration(); IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() && - (GV->isDeclaration() || GV->isWeakForLinker()); + !GV->isStrongDefinitionForLinker(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 416131745806..baadf081a64c 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -351,8 +351,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { Opcode = ISD::FTRUNC; break; } - MVT VTy = - TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true); + auto &DL = CI->getModule()->getDataLayout(); + MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(), + true); if (VTy == MVT::Other) return true; diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 874a6fce0656..5bc9124f8085 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -133,6 +133,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[ // register having an odd register number. CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>, + // The 'nest' parameter, if any, is passed in R11. + CCIfNest<CCAssignToReg<[R11]>>, + // The first 8 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index fafcd76f9d18..5f236f744fc4 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -262,7 +262,7 @@ static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) { // fast-isel, and return its equivalent machine type in VT. // FIXME: Copied directly from ARM -- factor into base class? bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT Evt = TLI.getValueType(Ty, true); + EVT Evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (Evt == MVT::Other || !Evt.isSimple()) return false; @@ -324,12 +324,13 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { return PPCComputeAddress(U->getOperand(0), Addr); case Instruction::IntToPtr: // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return PPCComputeAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return PPCComputeAddress(U->getOperand(0), Addr); break; case Instruction::GetElementPtr: { @@ -799,7 +800,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) { bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, bool IsZExt, unsigned DestReg) { Type *Ty = SrcValue1->getType(); - EVT SrcEVT = TLI.getValueType(Ty, true); + EVT SrcEVT = TLI.getValueType(DL, Ty, true); if (!SrcEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); @@ -893,8 +894,8 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, // Attempt to fast-select a floating-point extend instruction. bool PPCFastISel::SelectFPExt(const Instruction *I) { Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f32 || DestVT != MVT::f64) return false; @@ -911,8 +912,8 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) { // Attempt to fast-select a floating-point truncate instruction. bool PPCFastISel::SelectFPTrunc(const Instruction *I) { Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f64 || DestVT != MVT::f32) return false; @@ -992,7 +993,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { return false; Value *Src = I->getOperand(0); - EVT SrcEVT = TLI.getValueType(Src->getType(), true); + EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true); if (!SrcEVT.isSimple()) return false; @@ -1157,7 +1158,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { // Attempt to fast-select a binary integer operation that isn't already // handled automatically. bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); // We can get here in the case when we have a binary operation on a non-legal // type and the target independent selector doesn't know how to handle it. @@ -1594,7 +1595,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; @@ -1641,7 +1642,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) { RetRegs.push_back(VA.getLocReg()); unsigned SrcReg = Reg + VA.getValNo(); - EVT RVEVT = TLI.getValueType(RV->getType()); + EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; MVT RVVT = RVEVT.getSimpleVT(); @@ -1769,8 +1770,8 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) { // Attempt to fast-select an integer truncate instruction. bool PPCFastISel::SelectTrunc(const Instruction *I) { Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); - EVT DestVT = TLI.getValueType(I->getType(), true); + EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); + EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16) return false; @@ -1806,8 +1807,8 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) { if (!SrcReg) return false; EVT SrcEVT, DestEVT; - SrcEVT = TLI.getValueType(SrcTy, true); - DestEVT = TLI.getValueType(DestTy, true); + SrcEVT = TLI.getValueType(DL, SrcTy, true); + DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) @@ -1979,7 +1980,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // on the "if" path here. if (CModel == CodeModel::Large || (GV->getType()->getElementType()->isFunctionTy() && - (GV->isDeclaration() || GV->isWeakForLinker())) || + !GV->isStrongDefinitionForLinker()) || GV->isDeclaration() || GV->hasCommonLinkage() || GV->hasAvailableExternallyLinkage()) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), @@ -2127,7 +2128,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT, // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) return 0; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index b4008e4a886a..87229d80d9c1 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -306,9 +306,10 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); DebugLoc dl = MI->getDebugLoc(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UsedRegMask = 0; for (unsigned i = 0; i != 32; ++i) - if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i])) + if (MRI.isPhysRegModified(VRRegNo[i])) UsedRegMask |= 1 << (31-i); // Live in and live out values already must be in the mask, so don't bother @@ -1158,9 +1159,11 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } -void -PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *) const { +void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + const PPCRegisterInfo *RegInfo = static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo()); @@ -1168,8 +1171,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); unsigned LR = RegInfo->getRARegister(); FI->setMustSaveLR(MustSaveLR(MF, LR)); - MachineRegisterInfo &MRI = MF.getRegInfo(); - MRI.setPhysRegUnused(LR); + SavedRegs.reset(LR); // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); @@ -1214,9 +1216,9 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the // function uses CR 2, 3, or 4. if (!isPPC64 && !isDarwinABI && - (MRI.isPhysRegUsed(PPC::CR2) || - MRI.isPhysRegUsed(PPC::CR3) || - MRI.isPhysRegUsed(PPC::CR4))) { + (SavedRegs.test(PPC::CR2) || + SavedRegs.test(PPC::CR3) || + SavedRegs.test(PPC::CR4))) { int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); FI->setCRSpillFrameIndex(FrameIdx); } diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 28d074ecd79d..d6a389bfbf0d 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -45,8 +45,8 @@ public: bool needsFP(const MachineFunction &MF) const; void replaceFPWithRealFP(MachineFunction &MF) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = nullptr) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS = nullptr) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS = nullptr) const override; void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index c85c2610d2f5..01a3acb742e6 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -102,7 +102,8 @@ namespace { /// getSmallIPtrImm - Return a target constant of pointer type. inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) { - return CurDAG->getTargetConstant(Imm, dl, PPCLowering->getPointerTy()); + return CurDAG->getTargetConstant( + Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); } /// isRotateAndMask - Returns true if Mask and Shift can be folded into a @@ -313,7 +314,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { const Module *M = MF->getFunction()->getParent(); DebugLoc dl; - if (PPCLowering->getPointerTy() == MVT::i32) { + if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { if (PPCSubTarget->isTargetELF()) { GlobalBaseReg = PPC::R30; if (M->getPICLevel() == PICLevel::Small) { @@ -342,7 +343,8 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { } } return CurDAG->getRegister(GlobalBaseReg, - PPCLowering->getPointerTy()).getNode(); + PPCLowering->getPointerTy(CurDAG->getDataLayout())) + .getNode(); } /// isIntS16Immediate - This method tests to see if the node is either a 32-bit @@ -2205,7 +2207,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); - EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = + CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); if (!PPCSubTarget->useCRBits() && @@ -2468,10 +2471,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; - return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl, - LD->getValueType(0), - PPCLowering->getPointerTy(), - MVT::Other, Ops)); + return transferMemOperands( + N, CurDAG->getMachineNode( + Opcode, dl, LD->getValueType(0), + PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, + Ops)); } else { unsigned Opcode; bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; @@ -2506,10 +2510,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Base, Offset, Chain }; - return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl, - LD->getValueType(0), - PPCLowering->getPointerTy(), - MVT::Other, Ops)); + return transferMemOperands( + N, CurDAG->getMachineNode( + Opcode, dl, LD->getValueType(0), + PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, + Ops)); } } @@ -2662,7 +2667,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::SELECT_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); - EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = + CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); // If this is a select of i1 operands, we'll pattern match it. @@ -2901,7 +2907,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { const GlobalValue *GValue = G->getGlobal(); if ((GValue->getType()->getElementType()->isFunctionTy() && - (GValue->isDeclaration() || GValue->isWeakForLinker())) || + !GValue->isStrongDefinitionForLinker()) || GValue->isDeclaration() || GValue->hasCommonLinkage() || GValue->hasAvailableExternallyLinkage()) return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl, @@ -2915,7 +2921,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // Generate a PIC-safe GOT reference. assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); - return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(), MVT::i32); + return CurDAG->SelectNodeTo( + N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()), + MVT::i32); } case PPCISD::VADD_SPLAT: { // This expands into one of three sequences, depending on whether @@ -3398,9 +3406,8 @@ void PPCDAGToDAGISel::PeepholeCROps() { bool IsModified; do { IsModified = false; - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I); + for (SDNode &Node : CurDAG->allnodes()) { + MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); if (!MachineNode || MachineNode->use_empty()) continue; SDNode *ResNode = MachineNode; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 594472bbb47b..0ed9b051ffed 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -952,7 +952,8 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. -unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { +unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, + const DataLayout &DL) const { // Darwin passes everything on 4 byte boundary. if (Subtarget.isDarwin()) return 4; @@ -1055,7 +1056,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } -EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const { +EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, + EVT VT) const { if (!VT.isVector()) return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; @@ -1101,7 +1103,7 @@ static bool isConstantOrUndef(int Op, int Val) { /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; @@ -1132,7 +1134,7 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { - bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; @@ -1174,7 +1176,7 @@ bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, if (!Subtarget.hasP8Vector()) return false; - bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; @@ -1237,7 +1239,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 0, 0); else if (ShuffleKind == 2) // swapped @@ -1262,7 +1264,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 8, 8); else if (ShuffleKind == 2) // swapped @@ -1352,7 +1354,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, */ bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG) { - if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (DAG.getDataLayout().isLittleEndian()) { unsigned indexOffset = CheckEven ? 4 : 0; if (ShuffleKind == 1) // Unary return isVMerge(N, indexOffset, 0); @@ -1399,7 +1401,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, if (ShiftAmt < i) return -1; ShiftAmt -= i; - bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { // Check the rest of the elements to see if they are consecutive. @@ -1456,7 +1458,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); assert(isSplatShuffleMask(SVOp, EltSize)); - if (DAG.getTarget().getDataLayout()->isLittleEndian()) + if (DAG.getDataLayout().isLittleEndian()) return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); else return SVOp->getMaskElt(0) / EltSize; @@ -1796,7 +1798,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } } - Disp = DAG.getTargetConstant(0, dl, getPointerTy()); + Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); @@ -2084,7 +2086,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool is64bit = Subtarget.isPPC64(); const Module *M = DAG.getMachineFunction().getFunction()->getParent(); PICLevel::Level picLevel = M->getPICLevel(); @@ -2270,7 +2272,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); @@ -2399,11 +2401,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDValue Nest = Op.getOperand(3); // 'nest' parameter value SDLoc dl(Op); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); - Type *IntPtrTy = - DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( - *DAG.getContext()); + Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -2440,7 +2440,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), @@ -2476,8 +2476,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); - - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), PtrVT); @@ -2797,7 +2796,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); @@ -3023,7 +3022,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( assert(!(CallConv == CallingConv::Fast && isVarArg) && "fastcc not supported on varargs functions"); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); @@ -3059,12 +3058,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned NumBytes = LinkageSize; unsigned AvailableFPRs = Num_FPR_Regs; unsigned AvailableVRs = Num_VR_Regs; - for (unsigned i = 0, e = Ins.size(); i != e; ++i) + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + if (Ins[i].Flags.isNest()) + continue; + if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytes, AvailableFPRs, AvailableVRs, Subtarget.hasQPX())) HasParameterArea = true; + } // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, @@ -3216,6 +3219,17 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::i1: case MVT::i32: case MVT::i64: + if (Flags.isNest()) { + // The 'nest' parameter, if any, is passed in R11. + unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); + + if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) + ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); + + break; + } + // These can be scalar arguments or elements of an integer array type // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. @@ -3425,7 +3439,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && @@ -3845,7 +3859,8 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { return nullptr; // Top 6 bits have to be sext of immediate. return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op), - DAG.getTargetLoweringInfo().getPointerTy()).getNode(); + DAG.getTargetLoweringInfo().getPointerTy( + DAG.getDataLayout())).getNode(); } namespace { @@ -3991,7 +4006,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, bool isVector, SmallVectorImpl<SDValue> &MemOpChains, SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, SDLoc dl) { - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); if (!isTailCall) { if (isVector) { SDValue StackPtr; @@ -4053,7 +4068,7 @@ static bool isFunctionGlobalAddress(SDValue Callee) { static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff, - bool isTailCall, bool IsPatchPoint, + bool isTailCall, bool IsPatchPoint, bool hasNest, SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { @@ -4062,7 +4077,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, bool isSVR4ABI = Subtarget.isSVR4ABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. @@ -4084,8 +4099,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if ((DAG.getTarget().getRelocationModel() != Reloc::Static && (Subtarget.getTargetTriple().isMacOSX() && Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && - (G->getGlobal()->isDeclaration() || - G->getGlobal()->isWeakForLinker())) || + !G->getGlobal()->isStrongDefinitionForLinker()) || (Subtarget.isTargetELF() && !isPPC64 && !G->getGlobal()->hasLocalLinkage() && DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { @@ -4196,11 +4210,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, Chain = TOCVal.getValue(0); InFlag = TOCVal.getValue(1); - SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, - InFlag); + // If the function call has an explicit 'nest' parameter, it takes the + // place of the environment pointer. + if (!hasNest) { + SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, + InFlag); - Chain = EnvVal.getValue(0); - InFlag = EnvVal.getValue(1); + Chain = EnvVal.getValue(0); + InFlag = EnvVal.getValue(1); + } MTCTROps[0] = Chain; MTCTROps[1] = LoadFuncPtr; @@ -4218,7 +4236,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) - if (isSVR4ABI && isPPC64 && !isELFv2ABI) + if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -4254,8 +4272,7 @@ static bool isLocalCall(const SDValue &Callee) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - return !G->getGlobal()->isDeclaration() && - !G->getGlobal()->isWeakForLinker(); + return G->getGlobal()->isStrongDefinitionForLinker(); return false; } @@ -4308,7 +4325,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SDValue PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, bool isVarArg, bool IsPatchPoint, - SelectionDAG &DAG, + bool hasNest, SelectionDAG &DAG, SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, SDValue Chain, @@ -4321,8 +4338,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, std::vector<EVT> NodeTys; SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, - SPDiff, isTailCall, IsPatchPoint, RegsToPass, - Ops, NodeTys, CS, Subtarget); + SPDiff, isTailCall, IsPatchPoint, hasNest, + RegsToPass, Ops, NodeTys, CS, Subtarget); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) @@ -4381,7 +4398,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // allocated and an unnecessary move instruction being generated. CallOpc = PPCISD::BCTRL_LOAD_TOC; - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); @@ -4586,7 +4603,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, unsigned LocMemOffset = ByValVA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); - PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), + StackPtr, PtrOff); // Create a copy of the argument in the local area of the current // stack frame. @@ -4623,7 +4641,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, if (!isTailCall) { SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); - PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), + StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), @@ -4664,7 +4683,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, + /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } @@ -4703,8 +4723,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); + bool hasNest = false; - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); unsigned PtrByteSize = 8; MachineFunction &MF = DAG.getMachineFunction(); @@ -4758,6 +4779,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; + if (Flags.isNest()) + continue; + if (CallConv == CallingConv::Fast) { if (Flags.isByVal()) NumGPRsUsed += (Flags.getByValSize()+7)/8; @@ -5021,6 +5045,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, case MVT::i1: case MVT::i32: case MVT::i64: + if (Flags.isNest()) { + // The 'nest' parameter, if any, is passed in R11. + RegsToPass.push_back(std::make_pair(PPC::X11, Arg)); + hasNest = true; + break; + } + // These can be scalar arguments or elements of an integer array type // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. @@ -5302,9 +5333,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, FPOp, true, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, - RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, - NumBytes, Ins, InVals, CS); + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, + hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart, + Callee, SPDiff, NumBytes, Ins, InVals, CS); } SDValue @@ -5320,7 +5351,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, unsigned NumOps = Outs.size(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; unsigned PtrByteSize = isPPC64 ? 8 : 4; @@ -5693,7 +5724,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, FPOp, true, TailCallArguments); - return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, + return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, + /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CS); } @@ -5764,7 +5796,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); // Get the corect type for pointers. - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); // Construct the stack pointer operand. bool isPPC64 = Subtarget.isPPC64(); @@ -5794,7 +5826,7 @@ SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. @@ -5817,7 +5849,7 @@ SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. @@ -5845,7 +5877,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDLoc dl(Op); // Get the corect type for pointers. - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); // Negate the size. SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, DAG.getConstant(0, dl, PtrVT), Size); @@ -5888,8 +5920,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue BasePtr = LD->getBasePtr(); MachineMemOperand *MMO = LD->getMemOperand(); - SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, - BasePtr, MVT::i8, MMO); + SDValue NewLD = + DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, + BasePtr, MVT::i8, MMO); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; @@ -5913,7 +5946,8 @@ SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Value = ST->getValue(); MachineMemOperand *MMO = ST->getMemOperand(); - Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), + Value); return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); } @@ -6374,7 +6408,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SINT.getOpcode() == ISD::ZERO_EXTEND)) && SINT.getOperand(0).getValueType() == MVT::i32) { MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); @@ -6419,7 +6453,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); SDValue Ld; if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { @@ -6506,7 +6540,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); // Save FP Control Word to register EVT NodeTys[] = { @@ -6727,7 +6761,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); assert(BVN->getNumOperands() == 4 && @@ -6760,9 +6794,9 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } Constant *CP = ConstantVector::get(CV); - SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(), - 16 /* alignment */); - + SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()), + 16 /* alignment */); + SmallVector<SDValue, 2> Ops; Ops.push_back(DAG.getEntryNode()); Ops.push_back(CPIdx); @@ -7453,7 +7487,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); // Store the input value into Value#0 of the stack slot. @@ -7499,7 +7533,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue StoreChain = DAG.getEntryNode(); @@ -7651,9 +7685,9 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SmallVector<SDValue, 8> Stores; for (unsigned Idx = 0; Idx < 4; ++Idx) { - SDValue Ex = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, - DAG.getConstant(Idx, dl, getVectorIdxTy())); + SDValue Ex = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, + DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout()))); SDValue Store; if (ScalarVT != ScalarMemVT) Store = @@ -7715,7 +7749,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SmallVector<SDValue, 2> Ops; @@ -7920,7 +7954,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, assert(N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"); - EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); + EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + N->getValueType(0)); SDVTList VTs = DAG.getVTList(SVT, MVT::Other); SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), N->getOperand(1)); @@ -8248,7 +8283,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); - MVT PVT = getPointerTy(); + MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); // For v = setjmp(buf), we generate @@ -8386,7 +8421,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); - MVT PVT = getPointerTy(); + MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); @@ -9032,6 +9067,19 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Target Optimization Hooks //===----------------------------------------------------------------------===// +static std::string getRecipOp(const char *Base, EVT VT) { + std::string RecipOp(Base); + if (VT.getScalarType() == MVT::f64) + RecipOp += "d"; + else + RecipOp += "f"; + + if (VT.isVector()) + RecipOp = "vec-" + RecipOp; + + return RecipOp; +} + SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, @@ -9043,13 +9091,12 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - // Convergence is quadratic, so we essentially double the number of digits - // correct after every iteration. For both FRE and FRSQRTE, the minimum - // architected relative accuracy is 2^-5. When hasRecipPrec(), this is - // 2^-14. IEEE float has 23 digits and double has 52 digits. - RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; - if (VT.getScalarType() == MVT::f64) - ++RefinementSteps; + TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; + std::string RecipOp = getRecipOp("sqrt", VT); + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); UseOneConstNR = true; return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } @@ -9066,13 +9113,12 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - // Convergence is quadratic, so we essentially double the number of digits - // correct after every iteration. For both FRE and FRSQRTE, the minimum - // architected relative accuracy is 2^-5. When hasRecipPrec(), this is - // 2^-14. IEEE float has 23 digits and double has 52 digits. - RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; - if (VT.getScalarType() == MVT::f64) - ++RefinementSteps; + TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals; + std::string RecipOp = getRecipOp("div", VT); + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } return SDValue(); @@ -9854,7 +9900,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, assert(N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"); - EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); + EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); SDValue ShiftCst = DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); return DAG.getNode(ISD::SRA, dl, N->getValueType(0), @@ -10145,9 +10191,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, EVT MemVT = LD->getMemoryVT(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); + unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty); Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); - unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy); + unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy); if (LD->isUnindexed() && VT.isVector() && ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && // P8 and later hardware should just use LOAD. @@ -10219,7 +10265,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 2*MemVT.getStoreSize()-1); // Create the new base load. - SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy()); + SDValue LDXIntID = + DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue BaseLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, @@ -10243,7 +10290,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (!findConsecutiveLoad(LD, DAG)) --IncValue; - SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy()); + SDValue Increment = + DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); MachineMemOperand *ExtraMMO = @@ -10691,7 +10739,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { /// getConstraintType - Given a constraint, return the type of /// constraint it is for this target. PPCTargetLowering::ConstraintType -PPCTargetLowering::getConstraintType(const std::string &Constraint) const { +PPCTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; @@ -10776,7 +10824,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight( std::pair<unsigned, const TargetRegisterClass *> PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, + StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters @@ -10923,8 +10971,8 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. -bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // PPC does not allow r+i addressing modes for vectors! if (Ty->isVectorTy() && AM.BaseOffs != 0) @@ -10977,22 +11025,22 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); FuncInfo->setLRStoreRequired(); bool isPPC64 = Subtarget.isPPC64(); + auto PtrVT = getPointerTy(MF.getDataLayout()); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, isPPC64 ? MVT::i64 : MVT::i32); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - DAG.getNode(ISD::ADD, dl, getPointerTy(), - FrameAddr, Offset), + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo(), false, false, false, 0); } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, MachinePointerInfo(), false, false, false, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, + MachinePointerInfo(), false, false, false, 0); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, @@ -11000,13 +11048,13 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SDLoc dl(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - bool isPPC64 = PtrVT == MVT::i64; - MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout()); + bool isPPC64 = PtrVT == MVT::i64; + // Naked functions never have a frame pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned FrameReg; @@ -11026,8 +11074,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned PPCTargetLowering::getRegisterByName(const char* RegName, - EVT VT) const { +unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const { bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 02242b512a4f..6e13533cfdb3 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -423,7 +423,9 @@ namespace llvm { /// DAG node. const char *getTargetNodeName(unsigned Opcode) const override; - MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } bool isCheapToSpeculateCttz() const override { return true; @@ -434,7 +436,8 @@ namespace llvm { } /// getSetCCResultType - Return the ISD::SETCC ValueType - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; /// Return true if target always beneficiates from combining into FMA for a /// given value type. This must typically return false on targets where FMA @@ -487,7 +490,8 @@ namespace llvm { SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector<SDNode *> *Created) const override; - unsigned getRegisterByName(const char* RegName, EVT VT) const override; + unsigned getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const override; void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, @@ -519,8 +523,7 @@ namespace llvm { MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const; - ConstraintType - getConstraintType(const std::string &Constraint) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. @@ -529,13 +532,13 @@ namespace llvm { std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. - unsigned getByValTypeAlignment(Type *Ty) const override; + unsigned getByValTypeAlignment(Type *Ty, + const DataLayout &DL) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. @@ -544,8 +547,8 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "es") return InlineAsm::Constraint_es; else if (ConstraintCode == "o") @@ -561,8 +564,8 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, - unsigned AS) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can @@ -745,7 +748,7 @@ namespace llvm { SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall, - bool isVarArg, bool IsPatchPoint, + bool isVarArg, bool IsPatchPoint, bool hasNest, SelectionDAG &DAG, SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 696a83860e53..bf6e40296405 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -57,6 +57,10 @@ static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden); +static cl::opt<bool> +UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, + cl::desc("Use the old (incorrect) instruction latency calculation")); + // Pin the vtable to this file. void PPCInstrInfo::anchor() {} @@ -103,6 +107,35 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, return new ScoreboardHazardRecognizer(II, DAG); } +unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + if (!ItinData || UseOldLatencyCalc) + return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost); + + // The default implementation of getInstrLatency calls getStageLatency, but + // getStageLatency does not do the right thing for us. While we have + // itinerary, most cores are fully pipelined, and so the itineraries only + // express the first part of the pipeline, not every stage. Instead, we need + // to use the listed output operand cycle number (using operand 0 here, which + // is an output). + + unsigned Latency = 1; + unsigned DefClass = MI->getDesc().getSchedClass(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) + continue; + + int Cycle = ItinData->getOperandCycle(DefClass, i); + if (Cycle < 0) + continue; + + Latency = std::max(Latency, (unsigned) Cycle); + } + + return Latency; +} int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index e2d6346aa532..40badae644d6 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -95,6 +95,10 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; + unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = nullptr) const override; + int getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 43ba4994fde6..20c95fe888e0 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -989,6 +989,18 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B), def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B), (XVDIVDP $A, $B)>; +// Reciprocal estimate +def : Pat<(int_ppc_vsx_xvresp v4f32:$A), + (XVRESP $A)>; +def : Pat<(int_ppc_vsx_xvredp v2f64:$A), + (XVREDP $A)>; + +// Recip. square root estimate +def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A), + (XVRSQRTESP $A)>; +def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A), + (XVRSQRTEDP $A)>; + } // AddedComplexity } // HasVSX @@ -1013,6 +1025,9 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. v4i32:$XB)))]>; } // isCommutable + def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B), + (XXLEQV $A, $B)>; + def XXLORC : XX3Form<60, 170, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), "xxlorc $XT, $XA, $XB", IIC_VecGeneral, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 656376c641aa..2b09b2f625de 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -165,8 +165,7 @@ void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); - const PPCFrameLowering *PPCFI = - static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering()); + const PPCFrameLowering *TFI = getFrameLowering(MF); // The ZERO register is not really a register, but the representation of r0 // when used in instructions that treat r0 as the constant 0. @@ -209,7 +208,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::X1); Reserved.set(PPC::X13); - if (PPCFI->needsFP(MF)) + if (TFI->needsFP(MF)) Reserved.set(PPC::X31); if (hasBasePointer(MF)) @@ -230,7 +229,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } } - if (PPCFI->needsFP(MF)) + if (TFI->needsFP(MF)) Reserved.set(PPC::R31); if (hasBasePointer(MF)) { @@ -256,8 +255,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); - const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const PPCFrameLowering *TFI = getFrameLowering(MF); const unsigned DefaultSafety = 1; switch (RC->getID()) { @@ -341,7 +339,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { unsigned FrameSize = MFI->getStackSize(); // Get stack alignments. - unsigned TargetAlign = Subtarget.getFrameLowering()->getStackAlignment(); + const PPCFrameLowering *TFI = getFrameLowering(MF); + unsigned TargetAlign = TFI->getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); assert((maxCallFrameSize & (MaxAlign-1)) == 0 && "Maximum call-frame size not sufficiently aligned"); @@ -864,8 +863,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); - const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const PPCFrameLowering *TFI = getFrameLowering(MF); if (!TM.isPPC64()) return TFI->hasFP(MF) ? PPC::R31 : PPC::R1; @@ -908,10 +906,10 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const { } bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const { - const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); + const PPCFrameLowering *TFI = getFrameLowering(MF); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttribute(Attribute::StackAlignment)); @@ -946,11 +944,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { MachineBasicBlock &MBB = *MI->getParent(); MachineFunction &MF = *MBB.getParent(); - const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>(); - const PPCFrameLowering *PPCFI = - static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering()); - unsigned StackEst = - PPCFI->determineFrameLayout(MF, false, true); + const PPCFrameLowering *TFI = getFrameLowering(MF); + unsigned StackEst = TFI->determineFrameLayout(MF, false, true); // If we likely don't need a stack frame, then we probably don't need a // virtual base register either. @@ -1034,4 +1029,3 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, MI->getOpcode() == TargetOpcode::PATCHPOINT || (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0)); } - diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td index 635d154d10bf..267f56726180 100644 --- a/lib/Target/PowerPC/PPCScheduleP7.td +++ b/lib/Target/PowerPC/PPCScheduleP7.td @@ -315,6 +315,10 @@ def P7Itineraries : ProcessorItineraries< P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_VS1, P7_VS2]>], [5, 1, 1]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2, P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_VS1, P7_VS2]>], diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td index 020739baec3a..69e6d05c6604 100644 --- a/lib/Target/PowerPC/PPCScheduleP8.td +++ b/lib/Target/PowerPC/PPCScheduleP8.td @@ -323,6 +323,10 @@ def P8Itineraries : ProcessorItineraries< P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FPU1, P8_FPU2]>], [5, 1, 1]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [5, 1, 1]>, InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FPU1, P8_FPU2]>], diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp deleted file mode 100644 index dc1674214769..000000000000 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ /dev/null @@ -1,22 +0,0 @@ -//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PPCSelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#include "PPCTargetMachine.h" -using namespace llvm; - -#define DEBUG_TYPE "powerpc-selectiondag-info" - -PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL) - : TargetSelectionDAGInfo(DL) {} - -PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {} diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h deleted file mode 100644 index 2c1378d5670d..000000000000 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the PowerPC subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H -#define LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class PPCTargetMachine; - -class PPCSelectionDAGInfo : public TargetSelectionDAGInfo { -public: - explicit PPCSelectionDAGInfo(const DataLayout *DL); - ~PPCSelectionDAGInfo(); -}; - -} - -#endif diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index cf603fe17723..58daccae90f2 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -53,7 +53,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU, IsPPC64(TargetTriple.getArch() == Triple::ppc64 || TargetTriple.getArch() == Triple::ppc64le), TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)), - InstrInfo(*this), TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {} + InstrInfo(*this), TLInfo(TM, *this) {} void PPCSubtarget::initializeEnvironment() { StackAlignment = 16; diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index e9cc3d4bd5bc..0616c1f65604 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -17,10 +17,10 @@ #include "PPCFrameLowering.h" #include "PPCISelLowering.h" #include "PPCInstrInfo.h" -#include "PPCSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -129,7 +129,7 @@ protected: PPCFrameLowering FrameLowering; PPCInstrInfo InstrInfo; PPCTargetLowering TLInfo; - PPCSelectionDAGInfo TSInfo; + TargetSelectionDAGInfo TSInfo; public: /// This constructor initializes the data members to match that @@ -164,7 +164,7 @@ public: const PPCTargetLowering *getTargetLowering() const override { return &TLInfo; } - const PPCSelectionDAGInfo *getSelectionDAGInfo() const override { + const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } const PPCRegisterInfo *getRegisterInfo() const override { diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 074bc870751a..1daf244fed44 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -172,7 +172,26 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM, CM, OL), TLOF(createTLOF(getTargetTriple())), - TargetABI(computeTargetABI(TT, Options)) { + TargetABI(computeTargetABI(TT, Options)), + Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) { + + // For the estimates, convergence is quadratic, so we essentially double the + // number of digits correct after every iteration. For both FRE and FRSQRTE, + // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), + // this is 2^-14. IEEE float has 23 digits and double has 52 digits. + unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3, + RefinementSteps64 = RefinementSteps + 1; + + this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps); + this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps); + this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps); + this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps); + + this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64); + this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64); + this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64); + this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64); + initAsmInfo(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 5c0f7e629a69..6496339519a1 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -29,6 +29,8 @@ public: private: std::unique_ptr<TargetLoweringObjectFile> TLOF; PPCABI TargetABI; + PPCSubtarget Subtarget; + mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap; public: diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 25d563a7d975..e21c2b77f4d7 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -317,7 +317,7 @@ unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) { // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index 35e7a1497c83..368bef93f0dd 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -38,7 +38,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> { public: explicit PPCTTIImpl(const PPCTargetMachine *TM, Function &F) - : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. PPCTTIImpl(const PPCTTIImpl &Arg) @@ -46,18 +47,6 @@ public: PPCTTIImpl(PPCTTIImpl &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - PPCTTIImpl &operator=(const PPCTTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - PPCTTIImpl &operator=(PPCTTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } /// \name Scalar TTI Implementations /// @{ diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index f352fa647ace..58d3c3d3fa2e 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -136,6 +136,16 @@ protected: // source of the copy, it must still be live here. We can't use // interval testing for a physical register, so as long as we're // walking the MIs we may as well test liveness here. + // + // FIXME: There is a case that occurs in practice, like this: + // %vreg9<def> = COPY %F1; VSSRC:%vreg9 + // ... + // %vreg6<def> = COPY %vreg9; VSSRC:%vreg6,%vreg9 + // %vreg7<def> = COPY %vreg9; VSSRC:%vreg7,%vreg9 + // %vreg9<def,tied1> = XSMADDASP %vreg9<tied0>, %vreg1, %vreg4; VSSRC: + // %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg1, %vreg2; VSSRC: + // %vreg7<def,tied1> = XSMADDASP %vreg7<tied0>, %vreg1, %vreg3; VSSRC: + // which prevents an otherwise-profitable transformation. bool OtherUsers = false, KillsAddendSrc = false; for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI); J != JE; --J) { diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index e7ab71ac2106..3fb1dcc3d4af 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -80,6 +80,7 @@ struct PPCVSXSwapEntry { unsigned int IsSwap : 1; unsigned int MentionsPhysVR : 1; unsigned int IsSwappable : 1; + unsigned int MentionsPartialVR : 1; unsigned int SpecialHandling : 3; unsigned int WebRejected : 1; unsigned int WillRemove : 1; @@ -91,7 +92,9 @@ enum SHValues { SH_INSERT, SH_NOSWAP_LD, SH_NOSWAP_ST, - SH_SPLAT + SH_SPLAT, + SH_XXPERMDI, + SH_COPYSCALAR }; struct PPCVSXSwapRemoval : public MachineFunctionPass { @@ -167,6 +170,21 @@ private: isRegInClass(Reg, &PPC::VRRCRegClass)); } + // Return true iff the given register is a partial vector register. + bool isScalarVecReg(unsigned Reg) { + return (isRegInClass(Reg, &PPC::VSFRCRegClass) || + isRegInClass(Reg, &PPC::VSSRCRegClass)); + } + + // Return true iff the given register mentions all or part of a + // vector register. Also sets Partial to true if the mention + // is for just the floating-point register overlap of the register. + bool isAnyVecReg(unsigned Reg, bool &Partial) { + if (isScalarVecReg(Reg)) + Partial = true; + return isScalarVecReg(Reg) || isVecReg(Reg); + } + public: // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { @@ -223,12 +241,13 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { for (MachineInstr &MI : MBB) { bool RelevantInstr = false; + bool Partial = false; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (isVecReg(Reg)) { + if (isAnyVecReg(Reg, Partial)) { RelevantInstr = true; break; } @@ -250,8 +269,13 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { // Unless noted otherwise, an instruction is considered // safe for the optimization. There are a large number of // such true-SIMD instructions (all vector math, logical, - // select, compare, etc.). - SwapVector[VecIdx].IsSwappable = 1; + // select, compare, etc.). However, if the instruction + // mentions a partial vector register and does not have + // special handling defined, it is not swappable. + if (Partial) + SwapVector[VecIdx].MentionsPartialVR = 1; + else + SwapVector[VecIdx].IsSwappable = 1; break; case PPC::XXPERMDI: { // This is a swap if it is of the form XXPERMDI t, s, s, 2. @@ -269,25 +293,37 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { VecIdx); if (trueReg1 == trueReg2) SwapVector[VecIdx].IsSwap = 1; - } + else { + // We can still handle these if the two registers are not + // identical, by adjusting the form of the XXPERMDI. + SwapVector[VecIdx].IsSwappable = 1; + SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI; + } // This is a doubleword splat if it is of the form // XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3. As above we // must look through chains of copy-likes to find the source // register. We turn off the marking for mention of a physical // register, because splatting it is safe; the optimization - // will not swap the value in the physical register. - else if (immed == 0 || immed == 3) { + // will not swap the value in the physical register. Whether + // or not the two input registers are identical, we can handle + // these by adjusting the form of the XXPERMDI. + } else if (immed == 0 || immed == 3) { + + SwapVector[VecIdx].IsSwappable = 1; + SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI; + unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(), VecIdx); unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(), VecIdx); - if (trueReg1 == trueReg2) { - SwapVector[VecIdx].IsSwappable = 1; + if (trueReg1 == trueReg2) SwapVector[VecIdx].MentionsPhysVR = 0; - } + + } else { + // We can still handle these by adjusting the form of the XXPERMDI. + SwapVector[VecIdx].IsSwappable = 1; + SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI; } - // Any other form of XXPERMDI is lane-sensitive and unsafe - // for the optimization. break; } case PPC::LVX: @@ -324,7 +360,32 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { if (isVecReg(MI.getOperand(0).getReg()) && isVecReg(MI.getOperand(1).getReg())) SwapVector[VecIdx].IsSwappable = 1; + // If we have a copy from one scalar floating-point register + // to another, we can accept this even if it is a physical + // register. The only way this gets involved is if it feeds + // a SUBREG_TO_REG, which is handled by introducing a swap. + else if (isScalarVecReg(MI.getOperand(0).getReg()) && + isScalarVecReg(MI.getOperand(1).getReg())) + SwapVector[VecIdx].IsSwappable = 1; + break; + case PPC::SUBREG_TO_REG: { + // These are fine provided they are moving between full vector + // register classes. If they are moving from a scalar + // floating-point class to a vector class, we can handle those + // as well, provided we introduce a swap. It is generally the + // case that we will introduce fewer swaps than we remove, but + // (FIXME) a cost model could be used. However, introduced + // swaps could potentially be CSEd, so this is not trivial. + if (isVecReg(MI.getOperand(0).getReg()) && + isVecReg(MI.getOperand(2).getReg())) + SwapVector[VecIdx].IsSwappable = 1; + else if (isVecReg(MI.getOperand(0).getReg()) && + isScalarVecReg(MI.getOperand(2).getReg())) { + SwapVector[VecIdx].IsSwappable = 1; + SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYSCALAR; + } break; + } case PPC::VSPLTB: case PPC::VSPLTH: case PPC::VSPLTW: @@ -425,6 +486,10 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { case PPC::VUPKLSW: case PPC::XXMRGHW: case PPC::XXMRGLW: + // XXSLDWI could be replaced by a general permute with one of three + // permute control vectors (for shift values 1, 2, 3). However, + // VPERM has a more restrictive register class. + case PPC::XXSLDWI: case PPC::XXSPLTW: break; } @@ -501,18 +566,20 @@ void PPCVSXSwapRemoval::formWebs() { DEBUG(MI->dump()); // It's sufficient to walk vector uses and join them to their unique - // definitions. In addition, check *all* vector register operands - // for physical regs. + // definitions. In addition, check full vector register operands + // for physical regs. We exclude partial-vector register operands + // because we can handle them if copied to a full vector. for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!isVecReg(Reg)) + if (!isVecReg(Reg) && !isScalarVecReg(Reg)) continue; if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - SwapVector[EntryIdx].MentionsPhysVR = 1; + if (!(MI->isCopy() && isScalarVecReg(Reg))) + SwapVector[EntryIdx].MentionsPhysVR = 1; continue; } @@ -545,15 +612,21 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) { int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId); - // Reject webs containing mentions of physical registers, or containing - // operations that we don't know how to handle in a lane-permuted region. + // If representative is already rejected, don't waste further time. + if (SwapVector[Repr].WebRejected) + continue; + + // Reject webs containing mentions of physical or partial registers, or + // containing operations that we don't know how to handle in a lane- + // permuted region. if (SwapVector[EntryIdx].MentionsPhysVR || + SwapVector[EntryIdx].MentionsPartialVR || !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) { SwapVector[Repr].WebRejected = 1; DEBUG(dbgs() << - format("Web %d rejected for physreg, subreg, or not swap[pable]\n", + format("Web %d rejected for physreg, partial reg, or not swap[pable]\n", Repr)); DEBUG(dbgs() << " in " << EntryIdx << ": "); DEBUG(SwapVector[EntryIdx].VSEMI->dump()); @@ -588,7 +661,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() { } } - // Reject webs than contain swapping stores that are fed by something + // Reject webs that contain swapping stores that are fed by something // other than a swap instruction. } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) { MachineInstr *MI = SwapVector[EntryIdx].VSEMI; @@ -670,7 +743,8 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() { // The identified swap entry requires special handling to allow its // containing computation to be optimized. Perform that handling // here. -// FIXME: This code is to be phased in with subsequent patches. +// FIXME: Additional opportunities will be phased in with subsequent +// patches. void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { switch (SwapVector[EntryIdx].SpecialHandling) { @@ -704,6 +778,91 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) { break; } + // For an XXPERMDI that isn't handled otherwise, we need to + // reverse the order of the operands. If the selector operand + // has a value of 0 or 3, we need to change it to 3 or 0, + // respectively. Otherwise we should leave it alone. (This + // is equivalent to reversing the two bits of the selector + // operand and complementing the result.) + case SHValues::SH_XXPERMDI: { + MachineInstr *MI = SwapVector[EntryIdx].VSEMI; + + DEBUG(dbgs() << "Changing XXPERMDI: "); + DEBUG(MI->dump()); + + unsigned Selector = MI->getOperand(3).getImm(); + if (Selector == 0 || Selector == 3) + Selector = 3 - Selector; + MI->getOperand(3).setImm(Selector); + + unsigned Reg1 = MI->getOperand(1).getReg(); + unsigned Reg2 = MI->getOperand(2).getReg(); + MI->getOperand(1).setReg(Reg2); + MI->getOperand(2).setReg(Reg1); + + DEBUG(dbgs() << " Into: "); + DEBUG(MI->dump()); + break; + } + + // For a copy from a scalar floating-point register to a vector + // register, removing swaps will leave the copied value in the + // wrong lane. Insert a swap following the copy to fix this. + case SHValues::SH_COPYSCALAR: { + MachineInstr *MI = SwapVector[EntryIdx].VSEMI; + + DEBUG(dbgs() << "Changing SUBREG_TO_REG: "); + DEBUG(MI->dump()); + + unsigned DstReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + + MI->getOperand(0).setReg(NewVReg); + DEBUG(dbgs() << " Into: "); + DEBUG(MI->dump()); + + MachineBasicBlock::iterator InsertPoint = MI->getNextNode(); + + // Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG + // is copying to a VRRC, we need to be careful to avoid a register + // assignment problem. In this case we must copy from VRRC to VSRC + // prior to the swap, and from VSRC to VRRC following the swap. + // Coalescing will usually remove all this mess. + + if (DstRC == &PPC::VRRCRegClass) { + unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass); + unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass); + + BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), + TII->get(PPC::COPY), VSRCTmp1) + .addReg(NewVReg); + DEBUG(MI->getNextNode()->dump()); + + BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), + TII->get(PPC::XXPERMDI), VSRCTmp2) + .addReg(VSRCTmp1) + .addReg(VSRCTmp1) + .addImm(2); + DEBUG(MI->getNextNode()->getNextNode()->dump()); + + BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), + TII->get(PPC::COPY), DstReg) + .addReg(VSRCTmp2); + DEBUG(MI->getNextNode()->getNextNode()->getNextNode()->dump()); + + } else { + + BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(), + TII->get(PPC::XXPERMDI), DstReg) + .addReg(NewVReg) + .addReg(NewVReg) + .addImm(2); + + DEBUG(MI->getNextNode()->dump()); + } + break; + } } } @@ -756,6 +915,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() { DEBUG(dbgs() << "swap "); if (SwapVector[EntryIdx].MentionsPhysVR) DEBUG(dbgs() << "physreg "); + if (SwapVector[EntryIdx].MentionsPartialVR) + DEBUG(dbgs() << "partialreg "); if (SwapVector[EntryIdx].IsSwappable) { DEBUG(dbgs() << "swappable "); @@ -780,6 +941,12 @@ void PPCVSXSwapRemoval::dumpSwapVector() { case SH_SPLAT: DEBUG(dbgs() << "special:splat "); break; + case SH_XXPERMDI: + DEBUG(dbgs() << "special:xxpermdi "); + break; + case SH_COPYSCALAR: + DEBUG(dbgs() << "special:copyscalar "); + break; } } diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index c486411f9a1e..5b7bfdd28020 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -22,7 +22,6 @@ add_llvm_target(SparcCodeGen SparcRegisterInfo.cpp SparcSubtarget.cpp SparcTargetMachine.cpp - SparcSelectionDAGInfo.cpp SparcMCInstLower.cpp SparcTargetObjectFile.cpp ) diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index 91d2eeef0cc0..9113e4a46b96 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -57,7 +57,7 @@ static MCInstrInfo *createSparcMCInstrInfo() { return X; } -static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) { +static MCRegisterInfo *createSparcMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); InitSparcMCRegisterInfo(X, SP::O7); return X; @@ -65,11 +65,9 @@ static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) { static MCSubtargetInfo * createSparcMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); if (CPU.empty()) CPU = (TT.getArch() == Triple::sparcv9) ? "v9" : "v8"; - InitSparcMCSubtargetInfo(X, TT, CPU, FS); - return X; + return createSparcMCSubtargetInfoImpl(TT, CPU, FS); } // Code models. Some only make sense for 64-bit code. @@ -83,7 +81,8 @@ createSparcMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { // // All code models require that the text segment is smaller than 2GB. -static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createSparcMCCodeGenInfo(const Triple &TT, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); @@ -100,7 +99,8 @@ static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } -static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createSparcV9MCCodeGenInfo(const Triple &TT, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index bccc6bdd53eb..8fa10dcae114 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -190,11 +190,11 @@ static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI) { for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) - if (MRI->isPhysRegUsed(reg)) + if (!MRI->reg_nodbg_empty(reg)) return false; for (unsigned reg = SP::L0; reg <= SP::L7; ++reg) - if (MRI->isPhysRegUsed(reg)) + if (!MRI->reg_nodbg_empty(reg)) return false; return true; @@ -206,10 +206,10 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineFrameInfo *MFI = MF.getFrameInfo(); - return !(MFI->hasCalls() // has calls - || MRI.isPhysRegUsed(SP::L0) // Too many registers needed - || MRI.isPhysRegUsed(SP::O6) // %SP is used - || hasFP(MF)); // need %FP + return !(MFI->hasCalls() // has calls + || !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed + || !MRI.reg_nodbg_empty(SP::O6) // %SP is used + || hasFP(MF)); // need %FP } void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { @@ -218,16 +218,13 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { // Remap %i[0-7] to %o[0-7]. for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { - if (!MRI.isPhysRegUsed(reg)) + if (MRI.reg_nodbg_empty(reg)) continue; unsigned mapped_reg = (reg - SP::I0 + SP::O0); - assert(!MRI.isPhysRegUsed(mapped_reg)); + assert(MRI.reg_nodbg_empty(mapped_reg)); // Replace I register with O register. MRI.replaceRegWith(reg, mapped_reg); - - // Mark the reg unused. - MRI.setPhysRegUnused(reg); } // Rewrite MBB's Live-ins. @@ -247,9 +244,10 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { #endif } -void SparcFrameLowering::processFunctionBeforeCalleeSavedScan - (MachineFunction &MF, RegScavenger *RS) const { - +void SparcFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); if (!DisableLeafProc && isLeafProc(MF)) { SparcMachineFunctionInfo *MFI = MF.getInfo<SparcMachineFunctionInfo>(); MFI->setLeafProc(true); diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index bb3b78861cbd..29fc7b7ba036 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -36,8 +36,8 @@ public: bool hasReservedCallFrame(const MachineFunction &MF) const override; bool hasFP(const MachineFunction &MF) const override; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = nullptr) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS = nullptr) const override; private: // Remap input registers to output registers for leaf procedure. diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 9c594a9f0f65..340b72e7940f 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -67,13 +67,16 @@ private: SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF); - return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode(); + return CurDAG->getRegister(GlobalBaseReg, + TLI->getPointerTy(CurDAG->getDataLayout())) + .getNode(); } bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, SDValue &Base, SDValue &Offset) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FIN->getIndex(), TLI->getPointerTy(CurDAG->getDataLayout())); Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); return true; } @@ -88,8 +91,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { // Constant offset from frame ref. - Base = - CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex( + FIN->getIndex(), TLI->getPointerTy(CurDAG->getDataLayout())); } else { Base = Addr.getOperand(0); } @@ -134,7 +137,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { } R1 = Addr; - R2 = CurDAG->getRegister(SP::G0, TLI->getPointerTy()); + R2 = CurDAG->getRegister(SP::G0, TLI->getPointerTy(CurDAG->getDataLayout())); return true; } @@ -168,10 +171,9 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) { } else { TopPart = CurDAG->getRegister(SP::G0, MVT::i32); } - TopPart = SDValue(CurDAG->getMachineNode(SP::WRASRrr, dl, MVT::i32, - TopPart, - CurDAG->getRegister(SP::G0, MVT::i32)), 0); - TopPart = CurDAG->getCopyToReg(TopPart, dl, SP::Y, TopPart, SDValue()).getValue(1); + TopPart = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SP::Y, TopPart, + SDValue()) + .getValue(1); // FIXME: Handle div by immediate. unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr; @@ -184,12 +186,11 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) { SDValue MulLHS = N->getOperand(0); SDValue MulRHS = N->getOperand(1); unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr; - SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue, - MulLHS, MulRHS); - // The high part is in the Y register. - return CurDAG->SelectNodeTo(N, SP::RDASR, MVT::i32, - CurDAG->getRegister(SP::Y, MVT::i32), - SDValue(Mul, 1)); + SDNode *Mul = + CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, MulLHS, MulRHS); + SDValue ResultHigh = SDValue(Mul, 1); + ReplaceUses(SDValue(N, 0), ResultHigh); + return nullptr; } } diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 0481676dc1ac..4879d4ee79e5 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -221,10 +221,11 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain, unsigned Reg = SFI->getSRetReturnReg(); if (!Reg) llvm_unreachable("sret virtual register not created in the entry block"); - SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, PtrVT); Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag); Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy())); + RetOps.push_back(DAG.getRegister(SP::I0, PtrVT)); RetAddrOffset = 12; // CallInst + Delay Slot + Unimp } @@ -418,6 +419,7 @@ LowerFormalArguments_32(SDValue Chain, assert(VA.isMemLoc()); unsigned Offset = VA.getLocMemOffset()+StackOffset; + auto PtrVT = getPointerTy(DAG.getDataLayout()); if (VA.needsCustom()) { assert(VA.getValVT() == MVT::f64); @@ -426,7 +428,7 @@ LowerFormalArguments_32(SDValue Chain, int FI = MF.getFrameInfo()->CreateFixedObject(8, Offset, true); - SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT); SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, MachinePointerInfo(), false,false, false, 0); @@ -437,14 +439,14 @@ LowerFormalArguments_32(SDValue Chain, int FI = MF.getFrameInfo()->CreateFixedObject(4, Offset, true); - SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT); SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo(), false, false, false, 0); int FI2 = MF.getFrameInfo()->CreateFixedObject(4, Offset+4, true); - SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy()); + SDValue FIPtr2 = DAG.getFrameIndex(FI2, PtrVT); SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2, MachinePointerInfo(), @@ -460,7 +462,7 @@ LowerFormalArguments_32(SDValue Chain, int FI = MF.getFrameInfo()->CreateFixedObject(4, Offset, true); - SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT); SDValue Load ; if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) { Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, @@ -607,10 +609,10 @@ LowerFormalArguments_64(SDValue Chain, if (VA.isExtInLoc()) Offset += 8 - ValSize; int FI = MF.getFrameInfo()->CreateFixedObject(ValSize, Offset, true); - InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, - DAG.getFrameIndex(FI, getPointerTy()), - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0)); + InVals.push_back(DAG.getLoad( + VA.getValVT(), DL, Chain, + DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())), + MachinePointerInfo::getFixedStack(FI), false, false, false, 0)); } if (!IsVarArg) @@ -637,10 +639,10 @@ LowerFormalArguments_64(SDValue Chain, unsigned VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass); SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true); - OutChains.push_back(DAG.getStore(Chain, DL, VArg, - DAG.getFrameIndex(FI, getPointerTy()), - MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + auto PtrVT = getPointerTy(MF.getDataLayout()); + OutChains.push_back( + DAG.getStore(Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT), + MachinePointerInfo::getFixedStack(FI), false, false, 0)); } if (!OutChains.empty()) @@ -722,7 +724,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, unsigned Align = Flags.getByValAlign(); int FI = MFI->CreateStackObject(Size, Align, false); - SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue SizeNode = DAG.getConstant(Size, dl, MVT::i32); Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align, @@ -993,7 +995,7 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType()); Type *ElementTy = Ty->getElementType(); - return getDataLayout()->getTypeAllocSize(ElementTy); + return DAG.getDataLayout().getTypeAllocSize(ElementTy); } @@ -1057,6 +1059,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, SelectionDAG &DAG = CLI.DAG; SDLoc DL = CLI.DL; SDValue Chain = CLI.Chain; + auto PtrVT = getPointerTy(DAG.getDataLayout()); // Sparc target does not yet support tail call optimization. CLI.IsTailCall = false; @@ -1130,13 +1133,11 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, // Store and reload into the interger register reg and reg+1. unsigned Offset = 8 * (VA.getLocReg() - SP::I0); unsigned StackOffset = Offset + Subtarget->getStackPointerBias() + 128; - SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy()); + SDValue StackPtr = DAG.getRegister(SP::O6, PtrVT); SDValue HiPtrOff = DAG.getIntPtrConstant(StackOffset, DL); - HiPtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, - HiPtrOff); + HiPtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, HiPtrOff); SDValue LoPtrOff = DAG.getIntPtrConstant(StackOffset + 8, DL); - LoPtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, - LoPtrOff); + LoPtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, LoPtrOff); // Store to %sp+BIAS+128+Offset SDValue Store = DAG.getStore(Chain, DL, Arg, HiPtrOff, @@ -1180,13 +1181,13 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, assert(VA.isMemLoc()); // Create a store off the stack pointer for this argument. - SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy()); + SDValue StackPtr = DAG.getRegister(SP::O6, PtrVT); // The argument area starts at %fp+BIAS+128 in the callee frame, // %sp+BIAS+128 in ours. SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + Subtarget->getStackPointerBias() + 128, DL); - PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); @@ -1215,10 +1216,9 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, unsigned TF = ((getTargetMachine().getRelocationModel() == Reloc::PIC_) ? SparcMCExpr::VK_Sparc_WPLT30 : 0); if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0, - TF); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT, 0, TF); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) - Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy(), TF); + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, TF); // Build the operands for the call instruction itself. SmallVector<SDValue, 8> Ops; @@ -1370,6 +1370,8 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) { SparcTargetLowering::SparcTargetLowering(TargetMachine &TM, const SparcSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { + auto &DL = *TM.getDataLayout(); + // Set up the register classes. addRegisterClass(MVT::i32, &SP::IntRegsRegClass); addRegisterClass(MVT::f32, &SP::FPRegsRegClass); @@ -1394,10 +1396,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM, setTruncStoreAction(MVT::f128, MVT::f64, Expand); // Custom legalize GlobalAddress nodes into LO/HI parts. - setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom); - setOperationAction(ISD::GlobalTLSAddress, getPointerTy(), Custom); - setOperationAction(ISD::ConstantPool, getPointerTy(), Custom); - setOperationAction(ISD::BlockAddress, getPointerTy(), Custom); + setOperationAction(ISD::GlobalAddress, getPointerTy(DL), Custom); + setOperationAction(ISD::GlobalTLSAddress, getPointerTy(DL), Custom); + setOperationAction(ISD::ConstantPool, getPointerTy(DL), Custom); + setOperationAction(ISD::BlockAddress, getPointerTy(DL), Custom); // Sparc doesn't have sext_inreg, replace them with shl/sra setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -1704,7 +1706,8 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } -EVT SparcTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { +EVT SparcTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, + EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); @@ -1804,7 +1807,7 @@ SDValue SparcTargetLowering::makeHiLoPair(SDValue Op, // or ExternalSymbol SDNode. SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - EVT VT = getPointerTy(); + EVT VT = getPointerTy(DAG.getDataLayout()); // Handle PIC mode first. if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { @@ -1871,7 +1874,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op, GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); SDLoc DL(GA); const GlobalValue *GV = GA->getGlobal(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); TLSModel::Model model = getTargetMachine().getTLSModel(GV); @@ -1983,7 +1986,7 @@ SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args, if (ArgTy->isFP128Ty()) { // Create a stack object and pass the pointer to the library function. int FI = MFI->CreateStackObject(16, 8, false); - SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); Chain = DAG.getStore(Chain, DL, Entry.Node, @@ -2008,8 +2011,9 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG, ArgListTy Args; MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); - SDValue Callee = DAG.getExternalSymbol(LibFuncName, getPointerTy()); + SDValue Callee = DAG.getExternalSymbol(LibFuncName, PtrVT); Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext()); Type *RetTyABI = RetTy; SDValue Chain = DAG.getEntryNode(); @@ -2019,7 +2023,7 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG, // Create a Stack Object to receive the return value of type f128. ArgListEntry Entry; int RetFI = MFI->CreateStackObject(16, 8, false); - RetPtr = DAG.getFrameIndex(RetFI, getPointerTy()); + RetPtr = DAG.getFrameIndex(RetFI, PtrVT); Entry.Node = RetPtr; Entry.Ty = PointerType::getUnqual(RetTy); if (!Subtarget->is64Bit()) @@ -2082,7 +2086,8 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS, case SPCC::FCC_UE : LibCall = is64Bit? "_Qp_cmp" : "_Q_cmp"; break; } - SDValue Callee = DAG.getExternalSymbol(LibCall, getPointerTy()); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Callee = DAG.getExternalSymbol(LibCall, PtrVT); Type *RetTy = Type::getInt32Ty(*DAG.getContext()); ArgListTy Args; SDValue Chain = DAG.getEntryNode(); @@ -2362,6 +2367,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, const SparcTargetLowering &TLI) { MachineFunction &MF = DAG.getMachineFunction(); SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); + auto PtrVT = TLI.getPointerTy(DAG.getDataLayout()); // Need frame address to find the address of VarArgsFrameIndex. MF.getFrameInfo()->setFrameAddressIsTaken(true); @@ -2370,9 +2376,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, // memory location argument. SDLoc DL(Op); SDValue Offset = - DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), - DAG.getRegister(SP::I6, TLI.getPointerTy()), - DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL)); + DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(SP::I6, PtrVT), + DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL)); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1), MachinePointerInfo(SV), false, false, 0); @@ -2497,8 +2502,8 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG, SDValue RetAddr; if (depth == 0) { - unsigned RetReg = MF.addLiveIn(SP::I7, - TLI.getRegClassFor(TLI.getPointerTy())); + auto PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + unsigned RetReg = MF.addLiveIn(SP::I7, TLI.getRegClassFor(PtrVT)); RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT); return RetAddr; } @@ -3065,7 +3070,7 @@ SparcTargetLowering::expandAtomicRMW(MachineInstr *MI, /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. SparcTargetLowering::ConstraintType -SparcTargetLowering::getConstraintType(const std::string &Constraint) const { +SparcTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; @@ -3139,7 +3144,7 @@ LowerAsmOperandForConstraint(SDValue Op, std::pair<unsigned, const TargetRegisterClass *> SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, + StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index b6bc3d255713..bbc91a493c9d 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -72,7 +72,7 @@ namespace llvm { const char *getTargetNodeName(unsigned Opcode) const override; - ConstraintType getConstraintType(const std::string &Constraint) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override; @@ -82,14 +82,16 @@ namespace llvm { SelectionDAG &DAG) const override; std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } /// getSetCCResultType - Return the ISD::SETCC ValueType - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; SDValue LowerFormalArguments(SDValue Chain, diff --git a/lib/Target/Sparc/SparcInstrAliases.td b/lib/Target/Sparc/SparcInstrAliases.td index 670e9e989c81..25cc652dbd9e 100644 --- a/lib/Target/Sparc/SparcInstrAliases.td +++ b/lib/Target/Sparc/SparcInstrAliases.td @@ -245,6 +245,7 @@ multiclass fp_cond_alias<string cond, int condVal> { } defm : int_cond_alias<"a", 0b1000>; +defm : int_cond_alias<"", 0b1000>; // same as a; gnu asm, not in manual defm : int_cond_alias<"n", 0b0000>; defm : int_cond_alias<"ne", 0b1001>; defm : int_cond_alias<"nz", 0b1001>; // same as ne @@ -266,6 +267,7 @@ defm : int_cond_alias<"vc", 0b1111>; defm : int_cond_alias<"vs", 0b0111>; defm : fp_cond_alias<"a", 0b0000>; +defm : fp_cond_alias<"", 0b0000>; // same as a; gnu asm, not in manual defm : fp_cond_alias<"n", 0b1000>; defm : fp_cond_alias<"u", 0b0111>; defm : fp_cond_alias<"g", 0b0110>; @@ -284,7 +286,16 @@ defm : fp_cond_alias<"le", 0b1101>; defm : fp_cond_alias<"ule", 0b1110>; defm : fp_cond_alias<"o", 0b1111>; -// Instruction aliases for JMPL. +// Section A.3 Synthetic Instructions + +// Most are marked as Emit=0, so that they are not used for disassembly. This is +// an aesthetic issue, but the chosen policy is to typically prefer using the +// non-alias form, except for the most obvious and clarifying aliases: cmp, jmp, +// call, tst, ret, retl. + +// Note: cmp is handled in SparcInstrInfo. +// jmp/call/ret/retl have special case handling for output in +// SparcInstPrinter.cpp // jmp addr -> jmpl addr, %g0 def : InstAlias<"jmp $addr", (JMPLrr G0, MEMrr:$addr), 0>; @@ -294,25 +305,129 @@ def : InstAlias<"jmp $addr", (JMPLri G0, MEMri:$addr), 0>; def : InstAlias<"call $addr", (JMPLrr O7, MEMrr:$addr), 0>; def : InstAlias<"call $addr", (JMPLri O7, MEMri:$addr), 0>; -// retl -> RETL 8 -def : InstAlias<"retl", (RETL 8)>; +// tst reg -> orcc %g0, reg, %g0 +def : InstAlias<"tst $rs2", (ORCCrr G0, IntRegs:$rs2, G0)>; -// ret -> RET 8 +// ret -> jmpl %i7+8, %g0 (aka RET 8) def : InstAlias<"ret", (RET 8)>; -// mov reg, rd -> or %g0, reg, rd -def : InstAlias<"mov $rs2, $rd", (ORrr IntRegs:$rd, G0, IntRegs:$rs2)>; +// retl -> jmpl %o7+8, %g0 (aka RETL 8) +def : InstAlias<"retl", (RETL 8)>; -// mov simm13, rd -> or %g0, simm13, rd -def : InstAlias<"mov $simm13, $rd", (ORri IntRegs:$rd, G0, i32imm:$simm13)>; +// restore -> restore %g0, %g0, %g0 +def : InstAlias<"restore", (RESTORErr G0, G0, G0)>; + +// save -> restore %g0, %g0, %g0 +def : InstAlias<"save", (SAVErr G0, G0, G0)>; // set value, rd // (turns into a sequence of sethi+or, depending on the value) // def : InstAlias<"set $val, $rd", (ORri IntRegs:$rd, (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>; def SET : AsmPseudoInst<(outs IntRegs:$rd), (ins i32imm:$val), "set $val, $rd">; -// restore -> restore %g0, %g0, %g0 -def : InstAlias<"restore", (RESTORErr G0, G0, G0)>; +// not rd -> xnor rd, %g0, rd +def : InstAlias<"not $rd", (XNORrr IntRegs:$rd, IntRegs:$rd, G0), 0>; + +// not reg, rd -> xnor reg, %g0, rd +def : InstAlias<"not $rs1, $rd", (XNORrr IntRegs:$rd, IntRegs:$rs1, G0), 0>; + +// neg rd -> sub %g0, rd, rd +def : InstAlias<"neg $rd", (SUBrr IntRegs:$rd, G0, IntRegs:$rd), 0>; + +// neg reg, rd -> sub %g0, reg, rd +def : InstAlias<"neg $rs2, $rd", (SUBrr IntRegs:$rd, G0, IntRegs:$rs2), 0>; + +// inc rd -> add rd, 1, rd +def : InstAlias<"inc $rd", (ADDri IntRegs:$rd, IntRegs:$rd, 1), 0>; + +// inc simm13, rd -> add rd, simm13, rd +def : InstAlias<"inc $simm13, $rd", (ADDri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>; + +// inccc rd -> addcc rd, 1, rd +def : InstAlias<"inccc $rd", (ADDCCri IntRegs:$rd, IntRegs:$rd, 1), 0>; + +// inccc simm13, rd -> addcc rd, simm13, rd +def : InstAlias<"inccc $simm13, $rd", (ADDCCri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>; + +// dec rd -> sub rd, 1, rd +def : InstAlias<"dec $rd", (SUBri IntRegs:$rd, IntRegs:$rd, 1), 0>; + +// dec simm13, rd -> sub rd, simm13, rd +def : InstAlias<"dec $simm13, $rd", (SUBri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>; + +// deccc rd -> subcc rd, 1, rd +def : InstAlias<"deccc $rd", (SUBCCri IntRegs:$rd, IntRegs:$rd, 1), 0>; + +// deccc simm13, rd -> subcc rd, simm13, rd +def : InstAlias<"deccc $simm13, $rd", (SUBCCri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>; + +// btst reg_or_imm, reg -> andcc reg,reg_or_imm,%g0 +def : InstAlias<"btst $rs2, $rs1", (ANDCCrr G0, IntRegs:$rs1, IntRegs:$rs2), 0>; +def : InstAlias<"btst $simm13, $rs1", (ANDCCri G0, IntRegs:$rs1, i32imm:$simm13), 0>; + +// bset reg_or_imm, rd -> or rd,reg_or_imm,rd +def : InstAlias<"bset $rs2, $rd", (ORrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>; +def : InstAlias<"bset $simm13, $rd", (ORri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>; + +// bclr reg_or_imm, rd -> andn rd,reg_or_imm,rd +def : InstAlias<"bclr $rs2, $rd", (ANDNrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>; +def : InstAlias<"bclr $simm13, $rd", (ANDNri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>; + +// btog reg_or_imm, rd -> xor rd,reg_or_imm,rd +def : InstAlias<"btog $rs2, $rd", (XORrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>; +def : InstAlias<"btog $simm13, $rd", (XORri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>; + + +// clr rd -> or %g0, %g0, rd +def : InstAlias<"clr $rd", (ORrr IntRegs:$rd, G0, G0), 0>; + +// clr{b,h,} [addr] -> st{b,h,} %g0, [addr] +def : InstAlias<"clrb [$addr]", (STBrr MEMrr:$addr, G0), 0>; +def : InstAlias<"clrb [$addr]", (STBri MEMri:$addr, G0), 0>; +def : InstAlias<"clrh [$addr]", (STHrr MEMrr:$addr, G0), 0>; +def : InstAlias<"clrh [$addr]", (STHri MEMri:$addr, G0), 0>; +def : InstAlias<"clr [$addr]", (STrr MEMrr:$addr, G0), 0>; +def : InstAlias<"clr [$addr]", (STri MEMri:$addr, G0), 0>; + + +// mov reg_or_imm, rd -> or %g0, reg_or_imm, rd +def : InstAlias<"mov $rs2, $rd", (ORrr IntRegs:$rd, G0, IntRegs:$rs2)>; +def : InstAlias<"mov $simm13, $rd", (ORri IntRegs:$rd, G0, i32imm:$simm13)>; + +// mov specialreg, rd -> rd specialreg, rd +def : InstAlias<"mov $asr, $rd", (RDASR IntRegs:$rd, ASRRegs:$asr), 0>; +def : InstAlias<"mov %psr, $rd", (RDPSR IntRegs:$rd), 0>; +def : InstAlias<"mov %wim, $rd", (RDWIM IntRegs:$rd), 0>; +def : InstAlias<"mov %tbr, $rd", (RDTBR IntRegs:$rd), 0>; + +// mov reg_or_imm, specialreg -> wr %g0, reg_or_imm, specialreg +def : InstAlias<"mov $rs2, $asr", (WRASRrr ASRRegs:$asr, G0, IntRegs:$rs2), 0>; +def : InstAlias<"mov $simm13, $asr", (WRASRri ASRRegs:$asr, G0, i32imm:$simm13), 0>; +def : InstAlias<"mov $rs2, %psr", (WRPSRrr G0, IntRegs:$rs2), 0>; +def : InstAlias<"mov $simm13, %psr", (WRPSRri G0, i32imm:$simm13), 0>; +def : InstAlias<"mov $rs2, %wim", (WRWIMrr G0, IntRegs:$rs2), 0>; +def : InstAlias<"mov $simm13, %wim", (WRWIMri G0, i32imm:$simm13), 0>; +def : InstAlias<"mov $rs2, %tbr", (WRTBRrr G0, IntRegs:$rs2), 0>; +def : InstAlias<"mov $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>; + +// End of Section A.3 + +// wr reg_or_imm, specialreg -> wr %g0, reg_or_imm, specialreg +// (aka: omit the first arg when it's g0. This is not in the manual, but is +// supported by gnu and solaris as) +def : InstAlias<"wr $rs2, $asr", (WRASRrr ASRRegs:$asr, G0, IntRegs:$rs2), 0>; +def : InstAlias<"wr $simm13, $asr", (WRASRri ASRRegs:$asr, G0, i32imm:$simm13), 0>; +def : InstAlias<"wr $rs2, %psr", (WRPSRrr G0, IntRegs:$rs2), 0>; +def : InstAlias<"wr $simm13, %psr", (WRPSRri G0, i32imm:$simm13), 0>; +def : InstAlias<"wr $rs2, %wim", (WRWIMrr G0, IntRegs:$rs2), 0>; +def : InstAlias<"wr $simm13, %wim", (WRWIMri G0, i32imm:$simm13), 0>; +def : InstAlias<"wr $rs2, %tbr", (WRTBRrr G0, IntRegs:$rs2), 0>; +def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>; + + +// flush -> flush %g0 +def : InstAlias<"flush", (FLUSH), 0>; + def : MnemonicAlias<"return", "rett">, Requires<[HasV9]>; diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index f87cee43e319..6167c532db80 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -324,6 +324,15 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, numSubRegs = 4; movOpc = SP::FMOVS; } + } else if (SP::ASRRegsRegClass.contains(DestReg) && + SP::IntRegsRegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(SP::WRASRrr), DestReg) + .addReg(SP::G0) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (SP::IntRegsRegClass.contains(DestReg) && + SP::ASRRegsRegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(SP::RDASR), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } else llvm_unreachable("Impossible reg-to-reg copy"); diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index a02bae07a336..3b9e048ea8b3 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -536,6 +536,7 @@ let Defs = [ICC] in let Uses = [ICC] in defm SUBC : F3_12np <"subx", 0b001100>; +// cmp (from Section A.3) is a specialized alias for subcc let Defs = [ICC], rd = 0 in { def CMPrr : F3_1<2, 0b010100, (outs), (ins IntRegs:$rs1, IntRegs:$rs2), @@ -559,12 +560,12 @@ let Defs = [Y, ICC] in { } // Section B.19 - Divide Instructions, p. 115 -let Defs = [Y] in { +let Uses = [Y], Defs = [Y] in { defm UDIV : F3_12np<"udiv", 0b001110>; defm SDIV : F3_12np<"sdiv", 0b001111>; } -let Defs = [Y, ICC] in { +let Uses = [Y], Defs = [Y, ICC] in { defm UDIVCC : F3_12np<"udivcc", 0b011110>; defm SDIVCC : F3_12np<"sdivcc", 0b011111>; } @@ -828,6 +829,20 @@ let rd = 0 in def UNIMP : F2_1<0b000, (outs), (ins i32imm:$imm22), "unimp $imm22", []>; +// Section B.32 - Flush Instruction Memory +let rd = 0 in { + def FLUSHrr : F3_1<2, 0b111011, (outs), (ins MEMrr:$addr), + "flush $addr", []>; + def FLUSHri : F3_2<2, 0b111011, (outs), (ins MEMri:$addr), + "flush $addr", []>; + + // The no-arg FLUSH is only here for the benefit of the InstAlias + // "flush", which cannot seem to use FLUSHrr, due to the inability + // to construct a MEMrr with fixed G0 registers. + let rs1 = 0, rs2 = 0 in + def FLUSH : F3_1<2, 0b111011, (outs), (ins), "flush %g0", []>; +} + // Section B.33 - Floating-point Operate (FPop) Instructions // Convert Integer to Floating-point Instructions, p. 141 diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index e504da4d3b21..db8a7e86962d 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -249,4 +249,6 @@ def FCCRegs : RegisterClass<"SP", [i1], 1, (sequence "FCC%u", 0, 3)>; // Ancillary state registers def ASRRegs : RegisterClass<"SP", [i32], 32, - (add Y, (sequence "ASR%u", 1, 31))>; + (add Y, (sequence "ASR%u", 1, 31))> { + let isAllocatable = 0; +} diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp deleted file mode 100644 index a308fc5e739e..000000000000 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SparcSelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#include "SparcSelectionDAGInfo.h" -using namespace llvm; - -#define DEBUG_TYPE "sparc-selectiondag-info" - -SparcSelectionDAGInfo::SparcSelectionDAGInfo(const DataLayout &DL) - : TargetSelectionDAGInfo(&DL) { -} - -SparcSelectionDAGInfo::~SparcSelectionDAGInfo() { -} diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h deleted file mode 100644 index 6818291b30b4..000000000000 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the Sparc subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_SPARC_SPARCSELECTIONDAGINFO_H -#define LLVM_LIB_TARGET_SPARC_SPARCSELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class SparcTargetMachine; - -class SparcSelectionDAGInfo : public TargetSelectionDAGInfo { -public: - explicit SparcSelectionDAGInfo(const DataLayout &DL); - ~SparcSelectionDAGInfo() override; -}; - -} - -#endif diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index 479b25d2723f..d69da409e428 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -54,7 +54,7 @@ SparcSubtarget::SparcSubtarget(const Triple &TT, const std::string &CPU, bool is64Bit) : SparcGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - TSInfo(*TM.getDataLayout()), FrameLowering(*this) {} + FrameLowering(*this) {} int SparcSubtarget::getAdjustedFrameSize(int frameSize) const { diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index 983b1193975d..9d21911d88f0 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -17,9 +17,9 @@ #include "SparcFrameLowering.h" #include "SparcInstrInfo.h" #include "SparcISelLowering.h" -#include "SparcSelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -39,7 +39,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo { bool UsePopc; SparcInstrInfo InstrInfo; SparcTargetLowering TLInfo; - SparcSelectionDAGInfo TSInfo; + TargetSelectionDAGInfo TSInfo; SparcFrameLowering FrameLowering; public: @@ -56,7 +56,7 @@ public: const SparcTargetLowering *getTargetLowering() const override { return &TLInfo; } - const SparcSelectionDAGInfo *getSelectionDAGInfo() const override { + const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 81882106fc46..5fefa315a4cf 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -148,7 +148,7 @@ static MCInstrInfo *createSystemZMCInstrInfo() { return X; } -static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) { +static MCRegisterInfo *createSystemZMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); InitSystemZMCRegisterInfo(X, SystemZ::R14D); return X; @@ -156,12 +156,11 @@ static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) { static MCSubtargetInfo * createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitSystemZMCSubtargetInfo(X, TT, CPU, FS); - return X; + return createSystemZMCSubtargetInfoImpl(TT, CPU, FS); } -static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createSystemZMCCodeGenInfo(const Triple &TT, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index a636b35635ce..397de472a6ee 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -61,11 +61,12 @@ SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { return SpillOffsetTable; } -void SystemZFrameLowering:: -processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + MachineFrameInfo *MFFrame = MF.getFrameInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); bool HasFP = hasFP(MF); SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); @@ -77,17 +78,17 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // argument register R6D. if (IsVarArg) for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I) - MRI.setPhysRegUsed(SystemZ::ArgGPRs[I]); + SavedRegs.set(SystemZ::ArgGPRs[I]); // If the function requires a frame pointer, record that the hard // frame pointer will be clobbered. if (HasFP) - MRI.setPhysRegUsed(SystemZ::R11D); + SavedRegs.set(SystemZ::R11D); // If the function calls other functions, record that the return // address register will be clobbered. if (MFFrame->hasCalls()) - MRI.setPhysRegUsed(SystemZ::R14D); + SavedRegs.set(SystemZ::R14D); // If we are saving GPRs other than the stack pointer, we might as well // save and restore the stack pointer at the same time, via STMG and LMG. @@ -96,8 +97,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); for (unsigned I = 0; CSRegs[I]; ++I) { unsigned Reg = CSRegs[I]; - if (SystemZ::GR64BitRegClass.contains(Reg) && MRI.isPhysRegUsed(Reg)) { - MRI.setPhysRegUsed(SystemZ::R15D); + if (SystemZ::GR64BitRegClass.contains(Reg) && SavedRegs.test(Reg)) { + SavedRegs.set(SystemZ::R15D); break; } } diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h index 60bad894ee44..5ade757f17f7 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/lib/Target/SystemZ/SystemZFrameLowering.h @@ -27,8 +27,8 @@ public: bool isFPCloseToIncomingSP() const override { return false; } const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const override; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 372f6fb3ea50..056ee02dcc21 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -81,10 +81,11 @@ static MachineOperand earlyUseOperand(MachineOperand Op) { return Op; } -SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, +SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI) - : TargetLowering(tm), Subtarget(STI) { - MVT PtrVT = getPointerTy(); + : TargetLowering(TM), Subtarget(STI) { + auto &DL = *TM.getDataLayout(); + MVT PtrVT = getPointerTy(DL); // Set up the register classes. if (Subtarget.hasHighWord()) @@ -455,7 +456,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, MaxStoresPerMemsetOptSize = 0; } -EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { +EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, + LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); @@ -507,8 +509,8 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return true; } -bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // Punt on globals for now, although they can be used in limited // RELATIVE LONG cases. @@ -544,7 +546,7 @@ bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { //===----------------------------------------------------------------------===// TargetLowering::ConstraintType -SystemZTargetLowering::getConstraintType(const std::string &Constraint) const { +SystemZTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'a': // Address register @@ -641,13 +643,14 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, // has already been verified. MC is the class associated with "t" and // Map maps 0-based register numbers to LLVM register numbers. static std::pair<unsigned, const TargetRegisterClass *> -parseRegisterNumber(const std::string &Constraint, - const TargetRegisterClass *RC, const unsigned *Map) { +parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, + const unsigned *Map) { assert(*(Constraint.end()-1) == '}' && "Missing '}'"); if (isdigit(Constraint[2])) { - std::string Suffix(Constraint.data() + 2, Constraint.size() - 2); - unsigned Index = atoi(Suffix.c_str()); - if (Index < 16 && Map[Index]) + unsigned Index; + bool Failed = + Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); + if (!Failed && Index < 16 && Map[Index]) return std::make_pair(Map[Index], RC); } return std::make_pair(0U, nullptr); @@ -655,8 +658,7 @@ parseRegisterNumber(const std::string &Constraint, std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering::getRegForInlineAsmConstraint( - const TargetRegisterInfo *TRI, const std::string &Constraint, - MVT VT) const { + const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { @@ -687,7 +689,7 @@ SystemZTargetLowering::getRegForInlineAsmConstraint( return std::make_pair(0U, &SystemZ::FP32BitRegClass); } } - if (Constraint[0] == '{') { + if (Constraint.size() > 0 && Constraint[0] == '{') { // We need to override the default register parsing for GPRs and FPRs // because the interpretation depends on VT. The internal names of // the registers are also different from the external names @@ -931,7 +933,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, // Create the SelectionDAG nodes corresponding to a load // from this parameter. Unpromoted ints and floats are // passed as right-justified 8-byte values. - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, @@ -969,7 +971,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) { unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]); int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true); - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I], &SystemZ::FP64BitRegClass); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); @@ -1019,7 +1021,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(MF.getDataLayout()); // Detect unsupported vector argument and return types. if (Subtarget.hasVector()) { @@ -2401,7 +2403,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); int64_t Offset = Node->getOffset(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); Reloc::Model RM = DAG.getTarget().getRelocationModel(); CodeModel::Model CM = DAG.getTarget().getCodeModel(); @@ -2440,7 +2442,7 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, unsigned Opcode, SDValue GOTOffset) const { SDLoc DL(Node); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Chain = DAG.getEntryNode(); SDValue Glue; @@ -2486,7 +2488,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); // The high part of the thread pointer is in access register 0. @@ -2587,7 +2589,7 @@ SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, SDLoc DL(Node); const BlockAddress *BA = Node->getBlockAddress(); int64_t Offset = Node->getOffset(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); @@ -2597,7 +2599,7 @@ SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const { SDLoc DL(JT); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); // Use LARL to load the address of the table. @@ -2607,7 +2609,7 @@ SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const { SDLoc DL(CP); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; if (CP->isMachineConstantPoolEntry()) @@ -2671,7 +2673,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); SystemZMachineFunctionInfo *FuncInfo = MF.getInfo<SystemZMachineFunctionInfo>(); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(1); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 2f7617bbdac3..949b67f114ea 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -339,10 +339,10 @@ public: const SystemZSubtarget &STI); // Override TargetLowering. - MVT getScalarShiftAmountTy(EVT LHSTy) const override { + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; } - MVT getVectorIdxTy() const override { + MVT getVectorIdxTy(const DataLayout &DL) const override { // Only the lower 12 bits of an element index are used, so we don't // want to clobber the upper 32 bits of a GPR unnecessarily. return MVT::i32; @@ -364,12 +364,13 @@ public: return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } - EVT getSetCCResultType(LLVMContext &, EVT) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, + EVT) const override; bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, @@ -379,10 +380,9 @@ public: const char *getTargetNodeName(unsigned Opcode) const override; std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; TargetLowering::ConstraintType - getConstraintType(const std::string &Constraint) const override; + getConstraintType(StringRef Constraint) const override; TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override; @@ -391,8 +391,7 @@ public: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { + unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode.size() == 1) { switch(ConstraintCode[0]) { default: diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 7cabea962e91..dc7bd25d7ed5 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -36,7 +36,7 @@ SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF, BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const SystemZFrameLowering *TFI = getFrameLowering(MF); if (TFI->hasFP(MF)) { // R11D is the frame pointer. Reserve all aliases. @@ -64,7 +64,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MachineFunction &MF = *MBB.getParent(); auto *TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo()); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const SystemZFrameLowering *TFI = getFrameLowering(MF); DebugLoc DL = MI->getDebugLoc(); // Decompose the frame index into a base and offset. @@ -135,6 +135,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, unsigned SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const SystemZFrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D; } diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index e7e0268dbb8a..178aa3817311 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -18,12 +18,6 @@ using namespace llvm; #define DEBUG_TYPE "systemz-selectiondag-info" -SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const DataLayout &DL) - : TargetSelectionDAGInfo(&DL) {} - -SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { -} - // Decide whether it is best to use a loop or straight-line code for // a block operation of Size bytes with source address Src and destination // address Dest. Sequence is the opcode to use for straight-line code diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index a257d6b55494..246fa3e5e656 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -22,8 +22,7 @@ class SystemZTargetMachine; class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit SystemZSelectionDAGInfo(const DataLayout &DL); - ~SystemZSelectionDAGInfo(); + explicit SystemZSelectionDAGInfo() = default; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, SDValue Dst, SDValue Src, diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index eb5e5c0b9ff8..0b49fcdd8f78 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -42,7 +42,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, HasTransactionalExecution(false), HasProcessorAssist(false), HasVector(false), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - TSInfo(*TM.getDataLayout()), FrameLowering() {} + TSInfo(), FrameLowering() {} // Return true if GV binds locally under reloc model RM. static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) { diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index e9cabe968eea..4b80973ed879 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -29,7 +29,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> { public: explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F) - : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. SystemZTTIImpl(const SystemZTTIImpl &Arg) @@ -37,18 +38,6 @@ public: SystemZTTIImpl(SystemZTTIImpl &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - SystemZTTIImpl &operator=(const SystemZTTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - SystemZTTIImpl &operator=(SystemZTTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } /// \name Scalar TTI Implementations /// @{ diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 0b05303f71bf..83174c20c8e9 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -150,8 +150,9 @@ void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const { } TargetIRAnalysis TargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis( - [this](Function &) { return TargetTransformInfo(getDataLayout()); }); + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo(F.getParent()->getDataLayout()); + }); } static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo, diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp index 87df7af84525..6a61fcdf0f86 100644 --- a/lib/Target/TargetSubtargetInfo.cpp +++ b/lib/Target/TargetSubtargetInfo.cpp @@ -19,7 +19,14 @@ using namespace llvm; //--------------------------------------------------------------------------- // TargetSubtargetInfo Class // -TargetSubtargetInfo::TargetSubtargetInfo() {} +TargetSubtargetInfo::TargetSubtargetInfo( + const Triple &TT, StringRef CPU, StringRef FS, + ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD, + const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, + const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, + const InstrStage *IS, const unsigned *OC, const unsigned *FP) + : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) { +} TargetSubtargetInfo::~TargetSubtargetInfo() {} diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt index df04c2a3460b..25de9eee0831 100644 --- a/lib/Target/WebAssembly/CMakeLists.txt +++ b/lib/Target/WebAssembly/CMakeLists.txt @@ -1,6 +1,7 @@ set(LLVM_TARGET_DEFINITIONS WebAssembly.td) tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM WebAssemblyGenRegisterInfo.inc -gen-register-info) tablegen(LLVM WebAssemblyGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(WebAssemblyCommonTableGen) diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index d248556c62d7..224aa773a80e 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -29,6 +29,9 @@ using namespace llvm; #define GET_SUBTARGETINFO_MC_DESC #include "WebAssemblyGenSubtargetInfo.inc" +#define GET_REGINFO_MC_DESC +#include "WebAssemblyGenRegisterInfo.inc" + static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TT) { MCAsmInfo *MAI = new WebAssemblyMCAsmInfo(TT); diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 24893daec7ea..eebf5b72f62b 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -47,6 +47,9 @@ MCAsmBackend *createWebAssemblyAsmBackend(const Target &T, // Defines symbolic names for WebAssembly registers. This defines a mapping from // register name to register number. // +#define GET_REGINFO_ENUM +#include "WebAssemblyGenRegisterInfo.inc" + #define GET_SUBTARGETINFO_ENUM #include "WebAssemblyGenSubtargetInfo.inc" diff --git a/lib/Target/WebAssembly/Makefile b/lib/Target/WebAssembly/Makefile index 35d835c6506c..f102d73f6e86 100644 --- a/lib/Target/WebAssembly/Makefile +++ b/lib/Target/WebAssembly/Makefile @@ -12,7 +12,8 @@ LIBRARYNAME = LLVMWebAssemblyCodeGen TARGET = WebAssembly # Make sure that tblgen is run, first thing. -BUILT_SOURCES = WebAssemblyGenSubtargetInfo.inc WebAssemblyGenMCCodeEmitter.inc +BUILT_SOURCES = WebAssemblyGenRegisterInfo.inc WebAssemblyGenSubtargetInfo.inc \ + WebAssemblyGenMCCodeEmitter.inc DIRS = InstPrinter TargetInfo MCTargetDesc diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt index 7a71060a638f..63e02c455895 100644 --- a/lib/Target/WebAssembly/README.txt +++ b/lib/Target/WebAssembly/README.txt @@ -12,4 +12,15 @@ binary encoding of WebAssembly itself: * https://github.com/WebAssembly/design/blob/master/AstSemantics.md * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md +Interesting work that remains to be done: +* Write a pass to restructurize irreducible control flow. This needs to be done + before register allocation to be efficient, because it may duplicate basic + blocks and WebAssembly performs register allocation at a whole-function + level. Note that LLVM's GPU code has such a pass, but it linearizes control + flow (e.g. both sides of branches execute and are masked) which is undesirable + for WebAssembly. +* Basic relooper to expose control flow as an AST. +* Figure out how to properly use MC for virtual ISAs. This may require some + refactoring of MC. + //===---------------------------------------------------------------------===// diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 4eec02efbd94..4184eb6dc5a6 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -38,6 +38,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // WebAssembly does not produce floating-point exceptions on normal floating // point operations. setHasFloatingPointExceptions(false); + // We don't know the microarchitecture here, so just reduce register pressure. + setSchedulingPreference(Sched::RegPressure); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index 35e88eec8573..64415658ed81 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -6,9 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// WebAssembly Atomic operand code-gen constructs. -// +/// +/// \file +/// \brief WebAssembly Atomic operand code-gen constructs. +/// //===----------------------------------------------------------------------===// // TODO: Implement atomic instructions. diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td new file mode 100644 index 000000000000..6b5b6cd54173 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -0,0 +1,21 @@ +//===- WebAssemblyInstrCall.td-WebAssembly Call codegen support -*- tablegen -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief WebAssembly Call operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +/* + * TODO(jfb): Add the following. + * + * call_direct: call function directly + * call_indirect: call function indirectly + * addressof: obtain a function pointer value for a given function + */ diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td new file mode 100644 index 000000000000..3fa29061b1de --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td @@ -0,0 +1,44 @@ +//===-- WebAssemblyInstrConv.td-WebAssembly Conversion support -*- tablegen -*-= +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief WebAssembly datatype conversions, truncations, reinterpretations, +/// promotions, and demotions operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +/* + * TODO(jfb): Add the following. + * + * int32.wrap[int64]: wrap a 64-bit integer to a 32-bit integer + * int32.trunc_signed[float32]: truncate a 32-bit float to a signed 32-bit integer + * int32.trunc_signed[float64]: truncate a 64-bit float to a signed 32-bit integer + * int32.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 32-bit integer + * int32.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 32-bit integer + * int32.reinterpret[float32]: reinterpret the bits of a 32-bit float as a 32-bit integer + * int64.extend_signed[int32]: extend a signed 32-bit integer to a 64-bit integer + * int64.extend_unsigned[int32]: extend an unsigned 32-bit integer to a 64-bit integer + * int64.trunc_signed[float32]: truncate a 32-bit float to a signed 64-bit integer + * int64.trunc_signed[float64]: truncate a 64-bit float to a signed 64-bit integer + * int64.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 64-bit integer + * int64.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 64-bit integer + * int64.reinterpret[float64]: reinterpret the bits of a 64-bit float as a 64-bit integer + * float32.demote[float64]: demote a 64-bit float to a 32-bit float + * float32.cvt_signed[int32]: convert a signed 32-bit integer to a 32-bit float + * float32.cvt_signed[int64]: convert a signed 64-bit integer to a 32-bit float + * float32.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 32-bit float + * float32.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 32-bit float + * float32.reinterpret[int32]: reinterpret the bits of a 32-bit integer as a 32-bit float + * float64.promote[float32]: promote a 32-bit float to a 64-bit float + * float64.cvt_signed[int32]: convert a signed 32-bit integer to a 64-bit float + * float64.cvt_signed[int64]: convert a signed 64-bit integer to a 64-bit float + * float64.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 64-bit float + * float64.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 64-bit float + * float64.reinterpret[int64]: reinterpret the bits of a 64-bit integer as a 64-bit float + */ diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td new file mode 100644 index 000000000000..30ef6339d65a --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td @@ -0,0 +1,44 @@ +// WebAssemblyInstrFloat.td-WebAssembly Float codegen support ---*- tablegen -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief WebAssembly Floating-point operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +defm FADD : BinaryFP<fadd>; +defm FSUB : BinaryFP<fsub>; +defm FMUL : BinaryFP<fmul>; +defm FDIV : BinaryFP<fdiv>; +defm FABS : UnaryFP<fabs>; +defm FNEG : UnaryFP<fneg>; +defm COPYSIGN : BinaryFP<fcopysign>; +defm CEIL : UnaryFP<fceil>; +defm FLOOR : UnaryFP<ffloor>; +defm TRUNC : UnaryFP<ftrunc>; +defm NEARESTINT : UnaryFP<fnearbyint>; + +/* + * TODO(jfb): Add the following for 32-bit and 64-bit. + * + * float32.eq: compare equal + * float32.lt: less than + * float32.le: less than or equal + * float32.gt: greater than + * float32.ge: greater than or equal + */ + +defm SQRT : UnaryFP<fsqrt>; + +/* + * TODO(jfb): Add the following for 32-bit and 64-bit. + * + * float32.min: minimum (binary operator); if either operand is NaN, returns NaN + * float32.max: maximum (binary operator); if either operand is NaN, returns NaN + */ diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td index 8bbf3e9ec87b..513c36fa2ec2 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td @@ -6,9 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// WebAssembly instruction format definitions. -// +/// +/// \file +/// \brief WebAssembly instruction format definitions. +/// //===----------------------------------------------------------------------===// // WebAssembly Instruction Format @@ -26,3 +27,29 @@ class I<dag oops, dag iops, list<dag> pattern, string cstr = ""> dag InOperandList = iops; let Pattern = pattern; } + +// Unary and binary instructions, for the local types that WebAssembly supports. +multiclass UnaryInt<SDNode node> { + def _I32 : I<(outs Int32:$dst), (ins Int32:$src), + [(set Int32:$dst, (node Int32:$src))]>; + def _I64 : I<(outs Int64:$dst), (ins Int64:$src), + [(set Int64:$dst, (node Int64:$src))]>; +} +multiclass BinaryInt<SDNode node> { + def _I32 : I<(outs Int32:$dst), (ins Int32:$lhs, Int32:$rhs), + [(set Int32:$dst, (node Int32:$lhs, Int32:$rhs))]>; + def _I64 : I<(outs Int64:$dst), (ins Int64:$lhs, Int64:$rhs), + [(set Int64:$dst, (node Int64:$lhs, Int64:$rhs))]>; +} +multiclass UnaryFP<SDNode node> { + def _F32 : I<(outs Float32:$dst), (ins Float32:$src), + [(set Float32:$dst, (node Float32:$src))]>; + def _F64 : I<(outs Float64:$dst), (ins Float64:$src), + [(set Float64:$dst, (node Float64:$src))]>; +} +multiclass BinaryFP<SDNode node> { + def _F32 : I<(outs Float32:$dst), (ins Float32:$lhs, Float32:$rhs), + [(set Float32:$dst, (node Float32:$lhs, Float32:$rhs))]>; + def _F64 : I<(outs Float64:$dst), (ins Float64:$lhs, Float64:$rhs), + [(set Float64:$dst, (node Float64:$lhs, Float64:$rhs))]>; +} diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 142eccfbcaa5..fe3ca76dc08a 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -6,9 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// WebAssembly Instruction definitions. -// +/// +/// \file +/// \brief WebAssembly Instruction definitions. +/// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -32,6 +33,13 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">, // WebAssembly-specific Operands. //===----------------------------------------------------------------------===// +/* + * TODO(jfb): Add the following. + * + * get_local: read the current value of a local variable + * set_local: set the current value of a local variable +*/ + //===----------------------------------------------------------------------===// // WebAssembly Instruction Format Definitions. //===----------------------------------------------------------------------===// @@ -42,5 +50,10 @@ include "WebAssemblyInstrFormats.td" // Additional sets of instructions. //===----------------------------------------------------------------------===// +include "WebAssemblyInstrMemory.td" +include "WebAssemblyInstrCall.td" +include "WebAssemblyInstrInteger.td" +include "WebAssemblyInstrFloat.td" +include "WebAssemblyInstrConv.td" include "WebAssemblyInstrAtomics.td" include "WebAssemblyInstrSIMD.td" diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td new file mode 100644 index 000000000000..5f60fe81b1a2 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td @@ -0,0 +1,45 @@ +// WebAssemblyInstrInteger.td-WebAssembly Integer codegen -------*- tablegen -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief WebAssembly Integer operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +defm ADD : BinaryInt<add>; +defm SUB : BinaryInt<sub>; +defm MUL : BinaryInt<mul>; +defm SDIV : BinaryInt<sdiv>; +defm UDIV : BinaryInt<udiv>; +defm SREM : BinaryInt<srem>; +defm UREM : BinaryInt<urem>; +defm AND : BinaryInt<and>; +defm IOR : BinaryInt<or>; +defm XOR : BinaryInt<xor>; +defm SHL : BinaryInt<shl>; +defm SHR : BinaryInt<srl>; +defm SAR : BinaryInt<sra>; + +/* + * TODO(jfb): Add the following for 32-bit and 64-bit. + * + * int32.eq: signed-less compare equal + * int32.slt: signed less than + * int32.sle: signed less than or equal + * int32.ult: unsigned less than + * int32.ule: unsigned less than or equal + * int32.sgt: signed greater than + * int32.sge: signed greater than or equal + * int32.ugt: unsigned greater than + * int32.uge: unsigned greater than or equal + */ + +defm CLZ : UnaryInt<ctlz>; +defm CTZ : UnaryInt<cttz>; +defm POPCNT : UnaryInt<ctpop>; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td new file mode 100644 index 000000000000..5ab40e826caa --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -0,0 +1,46 @@ +// WebAssemblyInstrMemory.td-WebAssembly Memory codegen support -*- tablegen -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief WebAssembly Memory operand code-gen constructs. +/// +//===----------------------------------------------------------------------===// + +/* + * TODO(jfb): Add the following. + * Each has optional alignment and immediate byte offset. + * + * int32.load_sx[int8]: sign-extend to int32 + * int32.load_sx[int16]: sign-extend to int32 + * int32.load_zx[int8]: zero-extend to int32 + * int32.load_zx[int16]: zero-extend to int32 + * int32.load[int32]: (no conversion) + * int64.load_sx[int8]: sign-extend to int64 + * int64.load_sx[int16]: sign-extend to int64 + * int64.load_sx[int32]: sign-extend to int64 + * int64.load_zx[int8]: zero-extend to int64 + * int64.load_zx[int16]: zero-extend to int64 + * int64.load_zx[int32]: zero-extend to int64 + * int64.load[int64]: (no conversion) + * float32.load[float32]: (no conversion) + * float64.load[float64]: (no conversion) + * + * int32.store[int8]: wrap int32 to int8 + * int32.store[int16]: wrap int32 to int16 + * int32.store[int32]: (no conversion) + * int64.store[int8]: wrap int64 to int8 + * int64.store[int16]: wrap int64 to int16 + * int64.store[int32]: wrap int64 to int32 + * int64.store[int64]: (no conversion) + * float32.store[float32]: (no conversion) + * float64.store[float64]: (no conversion) + * + * load_global: load the value of a given global variable + * store_global: store a given value to a given global variable + */ diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index e25483ad3f7a..3e29906219d2 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -6,9 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// WebAssembly SIMD operand code-gen constructs. -// +/// +/// \file +/// \brief WebAssembly SIMD operand code-gen constructs. +/// //===----------------------------------------------------------------------===// // TODO: Implement SIMD instructions. diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index ad24c90af6a2..385c40bf6693 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -30,4 +30,58 @@ using namespace llvm; #define DEBUG_TYPE "wasm-reg-info" -WebAssemblyRegisterInfo::WebAssemblyRegisterInfo(const Triple &TT) : TT(TT) {} +#define GET_REGINFO_TARGET_DESC +#include "WebAssemblyGenRegisterInfo.inc" + +WebAssemblyRegisterInfo::WebAssemblyRegisterInfo(const Triple &TT) + : WebAssemblyGenRegisterInfo(0), TT(TT) {} + +const MCPhysReg * +WebAssemblyRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const { + static const MCPhysReg CalleeSavedRegs[] = {0}; + return CalleeSavedRegs; +} + +BitVector +WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + for (auto Reg : {WebAssembly::SP32, WebAssembly::SP64, WebAssembly::FP32, + WebAssembly::FP64}) + Reserved.set(Reg); + return Reserved; +} + +void WebAssemblyRegisterInfo::eliminateFrameIndex( + MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + llvm_unreachable("WebAssemblyRegisterInfo::eliminateFrameIndex"); // FIXME +} + +unsigned +WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + static const unsigned Regs[2][2] = { + /* !isArch64Bit isArch64Bit */ + /* !hasFP */ {WebAssembly::SP32, WebAssembly::SP64}, + /* hasFP */ {WebAssembly::FP32, WebAssembly::FP64}}; + const WebAssemblyFrameLowering *TFI = getFrameLowering(MF); + return Regs[TFI->hasFP(MF)][TT.isArch64Bit()]; +} + +bool WebAssemblyRegisterInfo::canRealignStack(const MachineFunction &MF) const { + return !MF.getFunction()->hasFnAttribute("no-realign-stack"); +} + +// FIXME: share this with other backends with identical implementation? +bool WebAssemblyRegisterInfo::needsStackRealignment( + const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const WebAssemblyFrameLowering *TFI = getFrameLowering(MF); + const Function *F = MF.getFunction(); + unsigned StackAlign = TFI->getStackAlignment(); + bool requiresRealignment = + ((MFI->getMaxAlignment() > StackAlign) || + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); + + return requiresRealignment && canRealignStack(MF); +} diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h index 55300287a51e..dbdb9d0457af 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h @@ -16,6 +16,9 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H +#define GET_REGINFO_HEADER +#include "WebAssemblyGenRegisterInfo.inc" + namespace llvm { class MachineFunction; @@ -23,11 +26,25 @@ class RegScavenger; class TargetRegisterClass; class Triple; -class WebAssemblyRegisterInfo final { +class WebAssemblyRegisterInfo final : public WebAssemblyGenRegisterInfo { const Triple &TT; public: explicit WebAssemblyRegisterInfo(const Triple &TT); + + // Code Generation virtual methods. + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + BitVector getReservedRegs(const MachineFunction &MF) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + + // Debug information queries. + unsigned getFrameRegister(const MachineFunction &MF) const override; + + // Base pointer (stack realignment) support. + bool canRealignStack(const MachineFunction &MF) const; + bool needsStackRealignment(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td index 7b3d636a2605..2ba42eb94a40 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -6,10 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file describes the WebAssembly register classes and some nominal -// physical registers. -// +/// +/// \file +/// \brief This file describes the WebAssembly register classes and some nominal +/// physical registers. +/// //===----------------------------------------------------------------------===// class WebAssemblyReg<string n> : Register<n> { @@ -23,6 +24,31 @@ class WebAssemblyRegClass<list<ValueType> regTypes, int alignment, dag regList> // Registers //===----------------------------------------------------------------------===// +// Special registers used as the frame and stack pointer. +// +// WebAssembly may someday supports mixed 32-bit and 64-bit heaps in the same +// application, which requires separate width FP and SP. +def FP32 : WebAssemblyReg<"%FP32">; +def FP64 : WebAssemblyReg<"%FP64">; +def SP32 : WebAssemblyReg<"%SP32">; +def SP64 : WebAssemblyReg<"%SP64">; + +// TODO(jfb) The following comes from NVPTX. Is it really needed, or can we do +// away with it? Try deleting once the backend works. +// WebAssembly uses virtual registers, but the backend defines a few physical +// registers here to keep SDAG and the MachineInstr layers happy. +foreach i = 0-4 in { + def I#i : WebAssemblyReg<"%i."#i>; // i32 + def L#i : WebAssemblyReg<"%l."#i>; // i64 + def F#i : WebAssemblyReg<"%f."#i>; // f32 + def D#i : WebAssemblyReg<"%d."#i>; // f64 +} + //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// + +def Int32 : WebAssemblyRegClass<[i32], 32, (add (sequence "I%u", 0, 4), SP32)>; +def Int64 : WebAssemblyRegClass<[i64], 64, (add (sequence "L%u", 0, 4), SP64)>; +def Float32 : WebAssemblyRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>; +def Float64 : WebAssemblyRegClass<[f64], 64, (add (sequence "D%u", 0, 4))>; diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp index cfd1bafff236..fae9c6100510 100644 --- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp @@ -17,7 +17,4 @@ using namespace llvm; #define DEBUG_TYPE "wasm-selectiondag-info" -WebAssemblySelectionDAGInfo::WebAssemblySelectionDAGInfo(const DataLayout *DL) - : TargetSelectionDAGInfo(DL) {} - WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() {} diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h index 03e8d393558d..13d96671276d 100644 --- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h +++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h @@ -22,7 +22,6 @@ namespace llvm { class WebAssemblySelectionDAGInfo final : public TargetSelectionDAGInfo { public: - explicit WebAssemblySelectionDAGInfo(const DataLayout *DL); ~WebAssemblySelectionDAGInfo() override; }; diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index addea8e3cc36..3d9e7aacbfbf 100644 --- a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -42,7 +42,7 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, const TargetMachine &TM) : WebAssemblyGenSubtargetInfo(TT, CPU, FS), HasSIMD128(false), CPUString(CPU), TargetTriple(TT), FrameLowering(), - InstrInfo(initializeSubtargetDependencies(FS)), - TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {} + InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(), + TLInfo(TM, *this) {} bool WebAssemblySubtarget::enableMachineScheduler() const { return true; } diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 08bd88c06985..7ffb6047b963 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -31,7 +31,6 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> { typedef TargetTransformInfo TTI; friend BaseT; - const WebAssemblyTargetMachine *TM; const WebAssemblySubtarget *ST; const WebAssemblyTargetLowering *TLI; @@ -40,30 +39,15 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> { public: WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, Function &F) - : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)), + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. WebAssemblyTTIImpl(const WebAssemblyTTIImpl &Arg) - : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST), - TLI(Arg.TLI) {} + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} WebAssemblyTTIImpl(WebAssemblyTTIImpl &&Arg) - : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)), - ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - WebAssemblyTTIImpl &operator=(const WebAssemblyTTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - TM = RHS.TM; - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - WebAssemblyTTIImpl &operator=(WebAssemblyTTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - TM = std::move(RHS.TM); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} /// \name Scalar TTI Implementations /// @{ diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 3cad9fa1e2ae..91b144a44824 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -878,6 +878,29 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DestName = getRegName(MI->getOperand(0).getReg()); break; + case X86::EXTRQI: + if (MI->getOperand(2).isImm() && + MI->getOperand(3).isImm()) + DecodeEXTRQIMask(MI->getOperand(2).getImm(), + MI->getOperand(3).getImm(), + ShuffleMask); + + DestName = getRegName(MI->getOperand(0).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; + + case X86::INSERTQI: + if (MI->getOperand(3).isImm() && + MI->getOperand(4).isImm()) + DecodeINSERTQIMask(MI->getOperand(3).getImm(), + MI->getOperand(4).getImm(), + ShuffleMask); + + DestName = getRegName(MI->getOperand(0).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + break; + case X86::PMOVZXBWrr: case X86::PMOVZXBDrr: case X86::PMOVZXBQrr: diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 3e0dc1424609..629802f5dc5e 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -220,7 +220,6 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) { case X86::PUSH32i8: return X86::PUSHi32; case X86::PUSH16i8: return X86::PUSHi16; case X86::PUSH64i8: return X86::PUSH64i32; - case X86::PUSH64i16: return X86::PUSH64i32; } } diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp index 89f394582631..ddb764facdbf 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp @@ -34,14 +34,16 @@ public: report_fatal_error(EC.message()); StringRef SymName = *SymNameOrErr; - uint64_t SymAddr; SymI->getAddress(SymAddr); + ErrorOr<uint64_t> SymAddr = SymI->getAddress(); + if (std::error_code EC = SymAddr.getError()) + report_fatal_error(EC.message()); uint64_t SymSize = SymI->getSize(); int64_t Addend = *ELFRelocationRef(Rel).getAddend(); MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName); // FIXME: check that the value is actually the same. if (!Sym->isVariable()) - Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx)); + Sym->setVariableValue(MCConstantExpr::create(*SymAddr, Ctx)); const MCExpr *Expr = nullptr; // If hasAddend is true, then we need to add Addend (r_addend) to Expr. diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 431010d4cbc2..83b4091d7665 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -88,9 +88,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT, if (CPUName.empty()) CPUName = "generic"; - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS); - return X; + return createX86MCSubtargetInfoImpl(TT, CPUName, ArchFS); } static MCInstrInfo *createX86MCInstrInfo() { @@ -99,17 +97,14 @@ static MCInstrInfo *createX86MCInstrInfo() { return X; } -static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) { - Triple TheTriple(TT); - unsigned RA = (TheTriple.getArch() == Triple::x86_64) - ? X86::RIP // Should have dwarf #16. - : X86::EIP; // Should have dwarf #8. +static MCRegisterInfo *createX86MCRegisterInfo(const Triple &TT) { + unsigned RA = (TT.getArch() == Triple::x86_64) + ? X86::RIP // Should have dwarf #16. + : X86::EIP; // Should have dwarf #8. MCRegisterInfo *X = new MCRegisterInfo(); - InitX86MCRegisterInfo(X, RA, - X86_MC::getDwarfRegFlavour(TheTriple, false), - X86_MC::getDwarfRegFlavour(TheTriple, true), - RA); + InitX86MCRegisterInfo(X, RA, X86_MC::getDwarfRegFlavour(TT, false), + X86_MC::getDwarfRegFlavour(TT, true), RA); X86_MC::InitLLVM2SEHRegisterMapping(X); return X; } @@ -156,24 +151,23 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createX86MCCodeGenInfo(const Triple &TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - Triple T(TT); - bool is64Bit = T.getArch() == Triple::x86_64; + bool is64Bit = TT.getArch() == Triple::x86_64; if (RM == Reloc::Default) { // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode. // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we // use static relocation model by default. - if (T.isOSDarwin()) { + if (TT.isOSDarwin()) { if (is64Bit) RM = Reloc::PIC_; else RM = Reloc::DynamicNoPIC; - } else if (T.isOSWindows() && is64Bit) + } else if (TT.isOSWindows() && is64Bit) RM = Reloc::PIC_; else RM = Reloc::Static; @@ -186,13 +180,13 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM, if (RM == Reloc::DynamicNoPIC) { if (is64Bit) RM = Reloc::PIC_; - else if (!T.isOSDarwin()) + else if (!TT.isOSDarwin()) RM = Reloc::Static; } // If we are on Darwin, disallow static relocation model in X86-64 mode, since // the Mach-O file format doesn't support it. - if (RM == Reloc::Static && T.isOSDarwin() && is64Bit) + if (RM == Reloc::Static && TT.isOSDarwin() && is64Bit) RM = Reloc::PIC_; // For static codegen, if we're not already set, use Small codegen. diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp index c9479b62f7b6..9bfe999424fa 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp @@ -34,7 +34,7 @@ public: if (std::error_code EC = SymNameOrErr.getError()) report_fatal_error(EC.message()); StringRef SymName = *SymNameOrErr; - uint64_t SymAddr; SymI->getAddress(SymAddr); + uint64_t SymAddr = SymI->getValue(); any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl()); bool isPCRel = Obj->getAnyRelocationPCRel(RE); @@ -90,8 +90,7 @@ public: const MCExpr *LHS = MCSymbolRefExpr::create(Sym, Ctx); symbol_iterator RSymI = Rel.getSymbol(); - uint64_t RSymAddr; - RSymI->getAddress(RSymAddr); + uint64_t RSymAddr = RSymI->getValue(); ErrorOr<StringRef> RSymName = RSymI->getName(); if (std::error_code EC = RSymName.getError()) report_fatal_error(EC.message()); diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index ef3318ba7580..cae865a40819 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -255,15 +255,13 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { - if (Imm & 0x88) - return; // Not a shuffle - unsigned HalfSize = VT.getVectorNumElements() / 2; for (unsigned l = 0; l != 2; ++l) { - unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize; + unsigned HalfMask = Imm >> (l * 4); + unsigned HalfBegin = (HalfMask & 0x3) * HalfSize; for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i) - ShuffleMask.push_back(i); + ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : (int)i); } } @@ -431,4 +429,78 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) { for (unsigned i = 1; i < NumElts; i++) Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i); } + +void DecodeEXTRQIMask(int Len, int Idx, + SmallVectorImpl<int> &ShuffleMask) { + // Only the bottom 6 bits are valid for each immediate. + Len &= 0x3F; + Idx &= 0x3F; + + // We can only decode this bit extraction instruction as a shuffle if both the + // length and index work with whole bytes. + if (0 != (Len % 8) || 0 != (Idx % 8)) + return; + + // A length of zero is equivalent to a bit length of 64. + if (Len == 0) + Len = 64; + + // If the length + index exceeds the bottom 64 bits the result is undefined. + if ((Len + Idx) > 64) { + ShuffleMask.append(16, SM_SentinelUndef); + return; + } + + // Convert index and index to work with bytes. + Len /= 8; + Idx /= 8; + + // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes + // of the lower 64-bits. The upper 64-bits are undefined. + for (int i = 0; i != Len; ++i) + ShuffleMask.push_back(i + Idx); + for (int i = Len; i != 8; ++i) + ShuffleMask.push_back(SM_SentinelZero); + for (int i = 8; i != 16; ++i) + ShuffleMask.push_back(SM_SentinelUndef); +} + +void DecodeINSERTQIMask(int Len, int Idx, + SmallVectorImpl<int> &ShuffleMask) { + // Only the bottom 6 bits are valid for each immediate. + Len &= 0x3F; + Idx &= 0x3F; + + // We can only decode this bit insertion instruction as a shuffle if both the + // length and index work with whole bytes. + if (0 != (Len % 8) || 0 != (Idx % 8)) + return; + + // A length of zero is equivalent to a bit length of 64. + if (Len == 0) + Len = 64; + + // If the length + index exceeds the bottom 64 bits the result is undefined. + if ((Len + Idx) > 64) { + ShuffleMask.append(16, SM_SentinelUndef); + return; + } + + // Convert index and index to work with bytes. + Len /= 8; + Idx /= 8; + + // INSERTQ: Extract lowest Len bytes from lower half of second source and + // insert over first source starting at Idx byte. The upper 64-bits are + // undefined. + for (int i = 0; i != Idx; ++i) + ShuffleMask.push_back(i); + for (int i = 0; i != Len; ++i) + ShuffleMask.push_back(i + 16); + for (int i = Idx + Len; i != 8; ++i) + ShuffleMask.push_back(i); + for (int i = 8; i != 16; ++i) + ShuffleMask.push_back(SM_SentinelUndef); +} + } // llvm namespace diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 14b69434806e..3d10d18e860e 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -100,6 +100,14 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); /// \brief Decode a scalar float move instruction as a shuffle mask. void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &ShuffleMask); + +/// \brief Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask. +void DecodeEXTRQIMask(int Len, int Idx, + SmallVectorImpl<int> &ShuffleMask); + +/// \brief Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask. +void DecodeINSERTQIMask(int Len, int Idx, + SmallVectorImpl<int> &ShuffleMask); } // llvm namespace #endif diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 02645460b6a2..b4319c8bb04f 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -317,7 +317,7 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, } bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { - EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); + EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true); if (evt == MVT::Other || !evt.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; @@ -608,7 +608,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { // Prepare for inserting code in the local-value area. SavePoint SaveInsertPt = enterLocalValueArea(); - if (TLI.getPointerTy() == MVT::i64) { + if (TLI.getPointerTy(DL) == MVT::i64) { Opc = X86::MOV64rm; RC = &X86::GR64RegClass; @@ -690,13 +690,14 @@ redo_gep: case Instruction::IntToPtr: // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return X86SelectAddress(U->getOperand(0), AM); break; case Instruction::PtrToInt: // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return X86SelectAddress(U->getOperand(0), AM); break; @@ -866,14 +867,14 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { case Instruction::IntToPtr: // Look past no-op inttoptrs if its operand is in the same BB. if (InMBB && - TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) return X86SelectCallAddress(U->getOperand(0), AM); break; case Instruction::PtrToInt: // Look past no-op ptrtoints if its operand is in the same BB. - if (InMBB && - TLI.getValueType(U->getType()) == TLI.getPointerTy()) + if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return X86SelectCallAddress(U->getOperand(0), AM); break; } @@ -1000,7 +1001,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; @@ -1031,7 +1032,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { return false; unsigned SrcReg = Reg + VA.getValNo(); - EVT SrcVT = TLI.getValueType(RV->getType()); + EVT SrcVT = TLI.getValueType(DL, RV->getType()); EVT DstVT = VA.getValVT(); // Special handling for extended integers. if (SrcVT != DstVT) { @@ -1300,7 +1301,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { } bool X86FastISel::X86SelectZExt(const Instruction *I) { - EVT DstVT = TLI.getValueType(I->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); if (!TLI.isTypeLegal(DstVT)) return false; @@ -1309,7 +1310,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) { return false; // Handle zero-extension from i1 to i8, which is common. - MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType()); + MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); if (SrcVT.SimpleTy == MVT::i1) { // Set the high bits to zero. ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); @@ -1362,7 +1363,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { X86::CondCode CC; if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { if (CI->hasOneUse() && CI->getParent() == I->getParent()) { - EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); + EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType()); // Try to optimize or fold the cmp. CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); @@ -1802,7 +1803,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { if (NeedSwap) std::swap(CmpLHS, CmpRHS); - EVT CmpVT = TLI.getValueType(CmpLHS->getType()); + EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType()); // Emit a compare of the LHS and RHS, setting the flags. if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) return false; @@ -2004,7 +2005,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { if (NeedSwap) std::swap(CmpLHS, CmpRHS); - EVT CmpVT = TLI.getValueType(CmpLHS->getType()); + EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType()); if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) return false; } else { @@ -2166,8 +2167,8 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { } bool X86FastISel::X86SelectTrunc(const Instruction *I) { - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); // This code only handles truncation to byte. if (DstVT != MVT::i8 && DstVT != MVT::i1) @@ -2416,7 +2417,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { } case Intrinsic::stackprotector: { // Emit code to store the stack guard onto the stack. - EVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI.getPointerTy(DL); const Value *Op1 = II->getArgOperand(0); // The guard's value. const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1)); @@ -2735,7 +2736,7 @@ bool X86FastISel::fastLowerArguments() { if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) return false; - EVT ArgVT = TLI.getValueType(ArgTy); + EVT ArgVT = TLI.getValueType(DL, ArgTy); if (!ArgVT.isSimple()) return false; switch (ArgVT.getSimpleVT().SimpleTy) { default: return false; @@ -2772,7 +2773,7 @@ bool X86FastISel::fastLowerArguments() { unsigned GPRIdx = 0; unsigned FPRIdx = 0; for (auto const &Arg : F->args()) { - MVT VT = TLI.getSimpleValueType(Arg.getType()); + MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned SrcReg; switch (VT.SimpleTy) { @@ -3108,7 +3109,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && - (GV->isDeclaration() || GV->isWeakForLinker()) && + !GV->isStrongDefinitionForLinker() && (!Subtarget->getTargetTriple().isMacOSX() || Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, @@ -3240,8 +3241,8 @@ X86FastISel::fastSelectInstruction(const Instruction *I) { return X86SelectSIToFP(I); case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); if (DstVT.bitsGT(SrcVT)) return X86SelectZExt(I); if (DstVT.bitsLT(SrcVT)) @@ -3384,7 +3385,7 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) { addDirectMem(MIB, AddrReg); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, - TM.getDataLayout()->getPointerSize(), Align); + DL.getPointerSize(), Align); MIB->addMemOperand(*FuncInfo.MF, MMO); return ResultReg; } @@ -3411,17 +3412,17 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); if (TM.getRelocationModel() == Reloc::Static && - TLI.getPointerTy() == MVT::i64) { + TLI.getPointerTy(DL) == MVT::i64) { // The displacement code could be more than 32 bits away so we need to use // an instruction with a 64 bit immediate BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri), ResultReg) .addGlobalAddress(GV); } else { - unsigned Opc = TLI.getPointerTy() == MVT::i32 - ? (Subtarget->isTarget64BitILP32() - ? X86::LEA64_32r : X86::LEA32r) - : X86::LEA64r; + unsigned Opc = + TLI.getPointerTy(DL) == MVT::i32 + ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r) + : X86::LEA64r; addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg), AM); } @@ -3431,7 +3432,7 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) { } unsigned X86FastISel::fastMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) @@ -3463,11 +3464,11 @@ unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) { X86AddressMode AM; if (!X86SelectAddress(C, AM)) return 0; - unsigned Opc = TLI.getPointerTy() == MVT::i32 - ? (Subtarget->isTarget64BitILP32() - ? X86::LEA64_32r : X86::LEA32r) - : X86::LEA64r; - const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + unsigned Opc = + TLI.getPointerTy(DL) == MVT::i32 + ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r) + : X86::LEA64r; + const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); unsigned ResultReg = createResultReg(RC); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg), AM); diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 40b9c8a863a3..36a8cdbab55b 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -301,8 +301,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { bool FPIsUsed = false; static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!"); + const MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned i = 0; i <= 6; ++i) - if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { + if (!MRI.reg_nodbg_empty(X86::FP0 + i)) { FPIsUsed = true; break; } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 85c5b6499131..2a35c4cf31f3 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -90,7 +90,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { return (MF.getTarget().Options.DisableFramePointerElim(MF) || TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() || + MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() || MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || MMI.callsUnwindInit() || MMI.callsEHReturn() || MFI->hasStackMap() || MFI->hasPatchPoint()); @@ -967,13 +967,26 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); if (X86FI->getRestoreBasePointer()) { - // Stash value of base pointer. Saving RSP instead of EBP shortens dependence chain. + // Stash value of base pointer. Saving RSP instead of EBP shortens + // dependence chain. Used by SjLj EH. unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), FramePtr, true, X86FI->getRestoreBasePointerOffset()) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); } + + if (X86FI->getHasSEHFramePtrSave()) { + // Stash the value of the frame pointer relative to the base pointer for + // Win32 EH. This supports Win32 EH, which does the inverse of the above: + // it recovers the frame pointer from the base pointer rather than the + // other way around. + unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true, + getFrameIndexOffset(MF, X86FI->getSEHFramePtrSaveIndex())) + .addReg(FramePtr) + .setMIFlag(MachineInstr::FrameSetup); + } } if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { @@ -1412,9 +1425,11 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -void -X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + MachineFrameInfo *MFI = MF.getFrameInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); @@ -1436,7 +1451,7 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Spill the BasePtr if it's used. if (TRI->hasBasePointer(MF)) - MF.getRegInfo().setPhysRegUsed(TRI->getBaseRegister()); + SavedRegs.set(TRI->getBaseRegister()); } static bool @@ -1667,8 +1682,6 @@ void X86FrameLowering::adjustForSegmentedStacks( .addImm(StackSize); BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) .addImm(X86FI->getArgumentStackSize()); - MF.getRegInfo().setPhysRegUsed(Reg10); - MF.getRegInfo().setPhysRegUsed(Reg11); } else { BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) .addImm(X86FI->getArgumentStackSize()); diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index c274c8820149..495cfcd1c3f7 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -68,8 +68,8 @@ public: void adjustForHiPEPrologue(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = nullptr) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS = nullptr) const override; bool assignCalleeSavedSpillSlots(MachineFunction &MF, diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 6b23e62a2d35..d5351d25d6ed 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -246,8 +246,9 @@ namespace { SDValue &Index, SDValue &Disp, SDValue &Segment) { Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) - ? CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, - TLI->getPointerTy()) + ? CurDAG->getTargetFrameIndex( + AM.Base_FrameIndex, + TLI->getPointerTy(CurDAG->getDataLayout())) : AM.Base_Reg; Scale = getI8Imm(AM.Scale, DL); Index = AM.IndexReg; @@ -581,11 +582,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() { void X86DAGToDAGISel::EmitSpecialCodeForMain() { if (Subtarget->isTargetCygMing()) { TargetLowering::ArgListTy Args; + auto &DL = CurDAG->getDataLayout(); TargetLowering::CallLoweringInfo CLI(*CurDAG); CLI.setChain(CurDAG->getRoot()) .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()), - CurDAG->getExternalSymbol("__main", TLI->getPointerTy()), + CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)), std::move(Args), 0); const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); @@ -1025,7 +1027,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, switch (N.getOpcode()) { default: break; - case ISD::FRAME_ALLOC_RECOVER: { + case ISD::LOCAL_RECOVER: { if (!AM.hasSymbolicDisplacement() && AM.Disp == 0) if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) { // Use the symbol and don't prefix it. @@ -1638,7 +1640,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, /// SDNode *X86DAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); - return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode(); + auto &DL = MF->getDataLayout(); + return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode(); } /// Atomic opcode table diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b16bd18aefaa..6e22ab30057c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -76,7 +76,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, : TargetLowering(TM), Subtarget(&STI) { X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); - TD = getDataLayout(); + TD = TM.getDataLayout(); // Set up the TargetLowering object. static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; @@ -505,7 +505,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(*TD), Custom); // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering. setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); @@ -825,6 +825,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FNEG, MVT::v2f64, Custom); setOperationAction(ISD::FABS, MVT::v2f64, Custom); + setOperationAction(ISD::SMAX, MVT::v8i16, Legal); + setOperationAction(ISD::UMAX, MVT::v16i8, Legal); + setOperationAction(ISD::SMIN, MVT::v8i16, Legal); + setOperationAction(ISD::UMIN, MVT::v16i8, Legal); + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); setOperationAction(ISD::SETCC, MVT::v16i8, Custom); setOperationAction(ISD::SETCC, MVT::v8i16, Custom); @@ -944,6 +949,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); } + setOperationAction(ISD::SMAX, MVT::v16i8, Legal); + setOperationAction(ISD::SMAX, MVT::v4i32, Legal); + setOperationAction(ISD::UMAX, MVT::v8i16, Legal); + setOperationAction(ISD::UMAX, MVT::v4i32, Legal); + setOperationAction(ISD::SMIN, MVT::v16i8, Legal); + setOperationAction(ISD::SMIN, MVT::v4i32, Legal); + setOperationAction(ISD::UMIN, MVT::v8i16, Legal); + setOperationAction(ISD::UMIN, MVT::v4i32, Legal); + // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); @@ -1018,6 +1032,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SHL, MVT::v2i64, Custom); setOperationAction(ISD::SHL, MVT::v4i32, Custom); + setOperationAction(ISD::SRA, MVT::v2i64, Custom); setOperationAction(ISD::SRA, MVT::v4i32, Custom); } @@ -1141,6 +1156,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MULHU, MVT::v16i16, Legal); setOperationAction(ISD::MULHS, MVT::v16i16, Legal); + setOperationAction(ISD::SMAX, MVT::v32i8, Legal); + setOperationAction(ISD::SMAX, MVT::v16i16, Legal); + setOperationAction(ISD::SMAX, MVT::v8i32, Legal); + setOperationAction(ISD::UMAX, MVT::v32i8, Legal); + setOperationAction(ISD::UMAX, MVT::v16i16, Legal); + setOperationAction(ISD::UMAX, MVT::v8i32, Legal); + setOperationAction(ISD::SMIN, MVT::v32i8, Legal); + setOperationAction(ISD::SMIN, MVT::v16i16, Legal); + setOperationAction(ISD::SMIN, MVT::v8i32, Legal); + setOperationAction(ISD::UMIN, MVT::v32i8, Legal); + setOperationAction(ISD::UMIN, MVT::v16i16, Legal); + setOperationAction(ISD::UMIN, MVT::v8i32, Legal); + // The custom lowering for UINT_TO_FP for v8i32 becomes interesting // when we have a 256bit-wide blend with immediate. setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); @@ -1184,6 +1212,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SHL, MVT::v4i64, Custom); setOperationAction(ISD::SHL, MVT::v8i32, Custom); + setOperationAction(ISD::SRA, MVT::v4i64, Custom); setOperationAction(ISD::SRA, MVT::v8i32, Custom); // Custom lower several nodes for 256-bit types. @@ -1376,6 +1405,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v16i1, Custom); setOperationAction(ISD::SELECT, MVT::v8i1, Custom); + setOperationAction(ISD::SMAX, MVT::v16i32, Legal); + setOperationAction(ISD::SMAX, MVT::v8i64, Legal); + setOperationAction(ISD::UMAX, MVT::v16i32, Legal); + setOperationAction(ISD::UMAX, MVT::v8i64, Legal); + setOperationAction(ISD::SMIN, MVT::v16i32, Legal); + setOperationAction(ISD::SMIN, MVT::v8i64, Legal); + setOperationAction(ISD::UMIN, MVT::v16i32, Legal); + setOperationAction(ISD::UMIN, MVT::v8i64, Legal); + setOperationAction(ISD::ADD, MVT::v8i64, Legal); setOperationAction(ISD::ADD, MVT::v16i32, Legal); @@ -1473,6 +1511,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SUB, MVT::v32i16, Legal); setOperationAction(ISD::SUB, MVT::v64i8, Legal); setOperationAction(ISD::MUL, MVT::v32i16, Legal); + setOperationAction(ISD::MULHS, MVT::v32i16, Legal); + setOperationAction(ISD::MULHU, MVT::v32i16, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); @@ -1492,6 +1532,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom); + setOperationAction(ISD::SMAX, MVT::v64i8, Legal); + setOperationAction(ISD::SMAX, MVT::v32i16, Legal); + setOperationAction(ISD::UMAX, MVT::v64i8, Legal); + setOperationAction(ISD::UMAX, MVT::v32i16, Legal); + setOperationAction(ISD::SMIN, MVT::v64i8, Legal); + setOperationAction(ISD::SMIN, MVT::v32i16, Legal); + setOperationAction(ISD::UMIN, MVT::v64i8, Legal); + setOperationAction(ISD::UMIN, MVT::v32i16, Legal); + for (int i = MVT::v32i8; i != MVT::v8i64; ++i) { const MVT VT = (MVT::SimpleValueType)i; @@ -1531,6 +1580,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::XOR, MVT::v4i32, Legal); setOperationAction(ISD::SRA, MVT::v2i64, Custom); setOperationAction(ISD::SRA, MVT::v4i64, Custom); + + setOperationAction(ISD::SMAX, MVT::v2i64, Legal); + setOperationAction(ISD::SMAX, MVT::v4i64, Legal); + setOperationAction(ISD::UMAX, MVT::v2i64, Legal); + setOperationAction(ISD::UMAX, MVT::v4i64, Legal); + setOperationAction(ISD::SMIN, MVT::v2i64, Legal); + setOperationAction(ISD::SMIN, MVT::v4i64, Legal); + setOperationAction(ISD::UMIN, MVT::v2i64, Legal); + setOperationAction(ISD::UMIN, MVT::v4i64, Legal); } // We want to custom lower some of our intrinsics. @@ -1611,6 +1669,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::BUILD_VECTOR); @@ -1652,7 +1711,8 @@ X86TargetLowering::getPreferredVectorAction(EVT VT) const { return TargetLoweringBase::getPreferredVectorAction(VT); } -EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { +EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, + EVT VT) const { if (!VT.isVector()) return Subtarget->hasAVX512() ? MVT::i1: MVT::i8; @@ -1724,10 +1784,11 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { /// function arguments in the caller parameter area. For X86, aggregates /// that contain SSE vectors are placed at 16-byte boundaries while the rest /// are at 4-byte boundaries. -unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { +unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty, + const DataLayout &DL) const { if (Subtarget->is64Bit()) { // Max of 8 and alignment of type. - unsigned TyAlign = TD->getABITypeAlignment(Ty); + unsigned TyAlign = DL.getABITypeAlignment(Ty); if (TyAlign > 8) return TyAlign; return 8; @@ -1840,7 +1901,8 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, if (!Subtarget->is64Bit()) // This doesn't have SDLoc associated with it, but is not really the // same as a Register. - return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy()); + return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), + getPointerTy(DAG.getDataLayout())); return Table; } @@ -2032,7 +2094,8 @@ X86TargetLowering::LowerReturn(SDValue Chain, // false, then an sret argument may be implicitly inserted in the SelDAG. In // either case FuncInfo->setSRetReturnReg() will have been called. if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) { - SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, getPointerTy()); + SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, + getPointerTy(MF.getDataLayout())); unsigned RetValReg = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ? @@ -2041,7 +2104,8 @@ X86TargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); // RAX/EAX now acts like a return value. - RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy())); + RetOps.push_back( + DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); } RetOps[0] = Chain; // Update chain. @@ -2288,11 +2352,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, unsigned Bytes = Flags.getByValSize(); if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable); - return DAG.getFrameIndex(FI, getPointerTy()); + return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); } else { int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), isImmutable); - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue Val = DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0); @@ -2471,7 +2535,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (Ins[i].Flags.isSRet()) { unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { - MVT PtrTy = getPointerTy(); + MVT PtrTy = getPointerTy(DAG.getDataLayout()); Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); FuncInfo->setSRetReturnReg(Reg); } @@ -2499,7 +2563,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, MachineModuleInfo &MMI = MF.getMMI(); const Function *WinEHParent = nullptr; - if (IsWin64 && MMI.hasWinEHFuncInfo(Fn)) + if (MMI.hasWinEHFuncInfo(Fn)) WinEHParent = MMI.getWinEHParent(Fn); bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn; bool IsWinEHParent = WinEHParent && WinEHParent == Fn; @@ -2561,11 +2625,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // Store the integer parameter registers. SmallVector<SDValue, 8> MemOps; SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), - getPointerTy()); + getPointerTy(DAG.getDataLayout())); unsigned Offset = FuncInfo->getVarArgsGPOffset(); for (SDValue Val : LiveGPRs) { - SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, - DAG.getIntPtrConstant(Offset, dl)); + SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), + RSFIN, DAG.getIntPtrConstant(Offset, dl)); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo::getFixedStack( @@ -2592,7 +2656,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); - } else if (IsWinEHOutlined) { + } else if (IsWin64 && IsWinEHOutlined) { // Get to the caller-allocated home save location. Add 8 to account // for the return address. int HomeOffset = TFI.getOffsetOfLocalArea() + 8; @@ -2605,8 +2669,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // Store the second integer parameter (rdx) into rsp+16 relative to the // stack pointer at the entry of the function. - SDValue RSFIN = - DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy()); + SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), + getPointerTy(DAG.getDataLayout())); unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64); Chain = DAG.getStore( @@ -2680,14 +2744,21 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setArgumentStackSize(StackSize); if (IsWinEHParent) { - int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); - SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64); - MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI; - SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64); - Chain = DAG.getStore(Chain, dl, Neg2, StackSlot, - MachinePointerInfo::getFixedStack(UnwindHelpFI), - /*isVolatile=*/true, - /*isNonTemporal=*/false, /*Alignment=*/0); + if (Is64Bit) { + int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); + SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64); + MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI; + SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64); + Chain = DAG.getStore(Chain, dl, Neg2, StackSlot, + MachinePointerInfo::getFixedStack(UnwindHelpFI), + /*isVolatile=*/true, + /*isNonTemporal=*/false, /*Alignment=*/0); + } else { + // Functions using Win32 EH are considered to have opaque SP adjustments + // to force local variables to be addressed from the frame or base + // pointers. + MFI->setHasOpaqueSPAdjustment(true); + } } return Chain; @@ -2701,7 +2772,8 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); - PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), + StackPtr, PtrOff); if (Flags.isByVal()) return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); @@ -2718,7 +2790,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, bool IsTailCall, bool Is64Bit, int FPDiff, SDLoc dl) const { // Adjust the Return address stack slot. - EVT VT = getPointerTy(); + EVT VT = getPointerTy(DAG.getDataLayout()); OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. @@ -2942,7 +3014,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert(VA.isMemLoc()); if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), - getPointerTy()); + getPointerTy(DAG.getDataLayout())); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); } @@ -2955,8 +3027,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. if (!isTailCall) { - RegsToPass.push_back(std::make_pair(unsigned(X86::EBX), - DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy()))); + RegsToPass.push_back(std::make_pair( + unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), + getPointerTy(DAG.getDataLayout())))); } else { // If we are tail calling and generating PIC/GOT style code load the // address of the callee into ECX. The value in ecx is used as target of @@ -3036,16 +3109,16 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); - FIN = DAG.getFrameIndex(FI, getPointerTy()); + FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); if (Flags.isByVal()) { // Copy relative to framepointer. SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); if (!StackPtr.getNode()) - StackPtr = DAG.getCopyFromReg(Chain, dl, - RegInfo->getStackRegister(), - getPointerTy()); - Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); + StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), + getPointerTy(DAG.getDataLayout())); + Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), + StackPtr, Source); MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, ArgChain, @@ -3064,8 +3137,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, - getPointerTy(), RegInfo->getSlotSize(), - FPDiff, dl); + getPointerTy(DAG.getDataLayout()), + RegInfo->getSlotSize(), FPDiff, dl); } // Build a sequence of copy-to-reg nodes chained together with token chain @@ -3106,7 +3179,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && - (GV->isDeclaration() || GV->isWeakForLinker()) && + !GV->isStrongDefinitionForLinker() && (!Subtarget->getTargetTriple().isMacOSX() || Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, @@ -3123,17 +3196,18 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ExtraLoad = true; } - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), - G->getOffset(), OpFlags); + Callee = DAG.getTargetGlobalAddress( + GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags); // Add a wrapper if needed. if (WrapperKind != ISD::DELETED_NODE) - Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee); + Callee = DAG.getNode(X86ISD::WrapperRIP, dl, + getPointerTy(DAG.getDataLayout()), Callee); // Add extra indirection if needed. if (ExtraLoad) - Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, - MachinePointerInfo::getGOT(), - false, false, false, 0); + Callee = DAG.getLoad( + getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee, + MachinePointerInfo::getGOT(), false, false, false, 0); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { unsigned char OpFlags = 0; @@ -3152,8 +3226,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, OpFlags = X86II::MO_DARWIN_STUB; } - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), - OpFlags); + Callee = DAG.getTargetExternalSymbol( + S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags); } else if (Subtarget->isTarget64BitILP32() && Callee->getValueType(0) == MVT::i32) { // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI @@ -3184,9 +3258,24 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); + + // If this is an invoke in a 32-bit function using an MSVC personality, assume + // the function clobbers all registers. If an exception is thrown, the runtime + // will not restore CSRs. + // FIXME: Model this more precisely so that we can register allocate across + // the normal edge and spill and fill across the exceptional edge. + if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) { + const Function *CallerFn = MF.getFunction(); + EHPersonality Pers = + CallerFn->hasPersonalityFn() + ? classifyEHPersonality(CallerFn->getPersonalityFn()) + : EHPersonality::Unknown; + if (isMSVCEHPersonality(Pers)) + Mask = RegInfo->getNoPreservedMask(); + } + Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) @@ -3650,7 +3739,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { FuncInfo->setRAIndex(ReturnAddrIndex); } - return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); + return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout())); } bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, @@ -3881,6 +3970,15 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const { return Subtarget->hasLZCNT(); } +/// isUndefInRange - Return true if every element in Mask, beginning +/// from position Pos and ending in Pos+Size is undef. +static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) { + for (unsigned i = Pos, e = Pos + Size; i != e; ++i) + if (0 <= Mask[i]) + return false; + return true; +} + /// isUndefOrInRange - Return true if Val is undef or if its value falls within /// the specified range (L, H]. static bool isUndefOrInRange(int Val, int Low, int Hi) { @@ -4322,6 +4420,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, /// IsUnary to true if only uses one source. Note that this will set IsUnary for /// shuffles which use a single input multiple times, and in those cases it will /// adjust the mask to only have indices within that single input. +/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero. static bool getTargetShuffleMask(SDNode *N, MVT VT, SmallVectorImpl<int> &Mask, bool &IsUnary) { unsigned NumElems = VT.getVectorNumElements(); @@ -4451,6 +4550,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); if (Mask.empty()) return false; + // Mask only contains negative index if an element is zero. + if (std::any_of(Mask.begin(), Mask.end(), + [](int M){ return M == SM_SentinelZero; })) + return false; break; case X86ISD::MOVSLDUP: DecodeMOVSLDUPMask(VT, Mask); @@ -4764,7 +4867,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, MVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getBitcast(ShVT, SrcOp); - MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType()); + MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT); assert(NumBits % 8 == 0 && "Only support byte sized shifts"); SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy); return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); @@ -5082,7 +5185,8 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, assert(C && "Invalid constant type"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); + SDValue CP = + DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment(); Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), @@ -6857,6 +6961,136 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1, return SDValue(); } +/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. +static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef<int> Mask, + SelectionDAG &DAG) { + SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); + assert(!Zeroable.all() && "Fully zeroable shuffle mask"); + + int Size = Mask.size(); + int HalfSize = Size / 2; + assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); + + // Upper half must be undefined. + if (!isUndefInRange(Mask, HalfSize, HalfSize)) + return SDValue(); + + // EXTRQ: Extract Len elements from lower half of source, starting at Idx. + // Remainder of lower half result is zero and upper half is all undef. + auto LowerAsEXTRQ = [&]() { + // Determine the extraction length from the part of the + // lower half that isn't zeroable. + int Len = HalfSize; + for (; Len >= 0; --Len) + if (!Zeroable[Len - 1]) + break; + assert(Len > 0 && "Zeroable shuffle mask"); + + // Attempt to match first Len sequential elements from the lower half. + SDValue Src; + int Idx = -1; + for (int i = 0; i != Len; ++i) { + int M = Mask[i]; + if (M < 0) + continue; + SDValue &V = (M < Size ? V1 : V2); + M = M % Size; + + // All mask elements must be in the lower half. + if (M > HalfSize) + return SDValue(); + + if (Idx < 0 || (Src == V && Idx == (M - i))) { + Src = V; + Idx = M - i; + continue; + } + return SDValue(); + } + + if (Idx < 0) + return SDValue(); + + assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); + int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; + int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; + return DAG.getNode(X86ISD::EXTRQI, DL, VT, Src, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); + }; + + if (SDValue ExtrQ = LowerAsEXTRQ()) + return ExtrQ; + + // INSERTQ: Extract lowest Len elements from lower half of second source and + // insert over first source, starting at Idx. + // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... } + auto LowerAsInsertQ = [&]() { + for (int Idx = 0; Idx != HalfSize; ++Idx) { + SDValue Base; + + // Attempt to match first source from mask before insertion point. + if (isUndefInRange(Mask, 0, Idx)) { + /* EMPTY */ + } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { + Base = V1; + } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { + Base = V2; + } else { + continue; + } + + // Extend the extraction length looking to match both the insertion of + // the second source and the remaining elements of the first. + for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { + SDValue Insert; + int Len = Hi - Idx; + + // Match insertion. + if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { + Insert = V1; + } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { + Insert = V2; + } else { + continue; + } + + // Match the remaining elements of the lower half. + if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { + /* EMPTY */ + } else if ((!Base || (Base == V1)) && + isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { + Base = V1; + } else if ((!Base || (Base == V2)) && + isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, + Size + Hi)) { + Base = V2; + } else { + continue; + } + + // We may not have a base (first source) - this can safely be undefined. + if (!Base) + Base = DAG.getUNDEF(VT); + + int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; + int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; + return DAG.getNode(X86ISD::INSERTQI, DL, VT, Base, Insert, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); + } + } + + return SDValue(); + }; + + if (SDValue InsertQ = LowerAsInsertQ()) + return InsertQ; + + return SDValue(); +} + /// \brief Lower a vector shuffle as a zero or any extension. /// /// Given a specific number of elements, element bit width, and extension @@ -6864,7 +7098,7 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1, /// features of the subtarget. static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( SDLoc DL, MVT VT, int Scale, bool AnyExt, SDValue InputV, - const X86Subtarget *Subtarget, SelectionDAG &DAG) { + ArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Scale > 1 && "Need a scale to extend."); int NumElements = VT.getVectorNumElements(); int EltBits = VT.getScalarSizeInBits(); @@ -6901,6 +7135,28 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG))); } + // The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes + // to 64-bits. + if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) { + assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!"); + assert(VT.getSizeInBits() == 128 && "Unexpected vector width!"); + + SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, + DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, + DAG.getConstant(EltBits, DL, MVT::i8), + DAG.getConstant(0, DL, MVT::i8))); + if (isUndefInRange(Mask, NumElements/2, NumElements/2)) + return DAG.getNode(ISD::BITCAST, DL, VT, Lo); + + SDValue Hi = + DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, + DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, + DAG.getConstant(EltBits, DL, MVT::i8), + DAG.getConstant(EltBits, DL, MVT::i8))); + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi)); + } + // If this would require more than 2 unpack instructions to expand, use // pshufb when available. We can only use more than 2 unpack instructions // when zero extending i8 elements which also makes it easier to use pshufb. @@ -6991,7 +7247,7 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend( return SDValue(); return lowerVectorShuffleAsSpecificZeroOrAnyExtend( - DL, VT, Scale, AnyExt, InputV, Subtarget, DAG); + DL, VT, Scale, AnyExt, InputV, Mask, Subtarget, DAG); }; // The widest scale possible for extending is to a 64-bit integer. @@ -7166,9 +7422,9 @@ static SDValue lowerVectorShuffleAsElementInsertion( V2 = DAG.getBitcast(MVT::v2i64, V2); V2 = DAG.getNode( X86ISD::VSHLDQ, DL, MVT::v2i64, V2, - DAG.getConstant( - V2Index * EltVT.getSizeInBits()/8, DL, - DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64))); + DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL, + DAG.getTargetLoweringInfo().getScalarShiftAmountTy( + DAG.getDataLayout(), VT))); V2 = DAG.getBitcast(VT, V2); } } @@ -8518,6 +8774,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, DAG)) return Shift; + // See if we can use SSE4A Extraction / Insertion. + if (Subtarget->hasSSE4A()) + if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG)) + return V; + // There are special ways we can lower some single-element blends. if (NumV2Inputs == 1) if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2, @@ -8670,6 +8931,11 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return ZExt; + // See if we can use SSE4A Extraction / Insertion. + if (Subtarget->hasSSE4A()) + if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG)) + return V; + int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 16; }); @@ -10613,12 +10879,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MaskEltVT.getSizeInBits()); Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT); + auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT, - getZeroVector(MaskVT, Subtarget, DAG, dl), - Idx, DAG.getConstant(0, dl, getPointerTy())); + getZeroVector(MaskVT, Subtarget, DAG, dl), Idx, + DAG.getConstant(0, dl, PtrVT)); SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), - Perm, DAG.getConstant(0, dl, getPointerTy())); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Perm, + DAG.getConstant(0, dl, PtrVT)); } return SDValue(); } @@ -11009,17 +11276,16 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { else if (Subtarget->isPICStyleStubPIC()) OpFlag = X86II::MO_PIC_BASE_OFFSET; - SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(), - CP->getAlignment(), - CP->getOffset(), OpFlag); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Result = DAG.getTargetConstantPool( + CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), OpFlag); SDLoc DL(CP); - Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + Result = DAG.getNode(WrapperKind, DL, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (OpFlag) { - Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, - SDLoc(), getPointerTy()), - Result); + Result = + DAG.getNode(ISD::ADD, DL, PtrVT, + DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); } return Result; @@ -11042,17 +11308,16 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { else if (Subtarget->isPICStyleStubPIC()) OpFlag = X86II::MO_PIC_BASE_OFFSET; - SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), - OpFlag); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); SDLoc DL(JT); - Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + Result = DAG.getNode(WrapperKind, DL, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (OpFlag) - Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, - SDLoc(), getPointerTy()), - Result); + Result = + DAG.getNode(ISD::ADD, DL, PtrVT, + DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); return Result; } @@ -11080,24 +11345,24 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { OpFlag = X86II::MO_DARWIN_NONLAZY; } - SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag); SDLoc DL(Op); - Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + Result = DAG.getNode(WrapperKind, DL, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ && !Subtarget->is64Bit()) { - Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, - SDLoc(), getPointerTy()), - Result); + Result = + DAG.getNode(ISD::ADD, DL, PtrVT, + DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); } // For symbols that require a load from a stub to get the address, emit the // load. if (isGlobalStubReference(OpFlag)) - Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result, + Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(), false, false, false, 0); return Result; @@ -11112,20 +11377,19 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset(); SDLoc dl(Op); - SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(), Offset, - OpFlags); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags); if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) - Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result); + Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result); else - Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); + Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (isGlobalRelativeToPICBase(OpFlags)) { - Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()), - Result); + Result = DAG.getNode(ISD::ADD, dl, PtrVT, + DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); } return Result; @@ -11139,40 +11403,40 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, DAG.getTarget()); CodeModel::Model M = DAG.getTarget().getCodeModel(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; if (OpFlags == X86II::MO_NO_FLAG && X86::isOffsetSuitableForCodeModel(Offset, M)) { // A direct static reference to a global. - Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); + Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); Offset = 0; } else { - Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); + Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags); } if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) - Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result); + Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result); else - Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); + Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result); // With PIC, the address is actually $g + Offset. if (isGlobalRelativeToPICBase(OpFlags)) { - Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()), - Result); + Result = DAG.getNode(ISD::ADD, dl, PtrVT, + DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); } // For globals that require a load from a stub to get the address, emit the // load. if (isGlobalStubReference(OpFlags)) - Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, + Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(), false, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. if (Offset != 0) - Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result, - DAG.getConstant(Offset, dl, getPointerTy())); + Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, + DAG.getConstant(Offset, dl, PtrVT)); return Result; } @@ -11336,22 +11600,23 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GA->getGlobal(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); if (Subtarget->isTargetELF()) { TLSModel::Model model = DAG.getTarget().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: if (Subtarget->is64Bit()) - return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); - return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); + return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); + return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT); case TLSModel::LocalDynamic: - return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(), + return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget->is64Bit()); case TLSModel::InitialExec: case TLSModel::LocalExec: - return LowerToTLSExecModel( - GA, DAG, getPointerTy(), model, Subtarget->is64Bit(), - DAG.getTarget().getRelocationModel() == Reloc::PIC_); + return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget->is64Bit(), + DAG.getTarget().getRelocationModel() == + Reloc::PIC_); } llvm_unreachable("Unknown TLS model."); } @@ -11374,13 +11639,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, GA->getValueType(0), GA->getOffset(), OpFlag); - SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result); // With PIC32, the address is actually $g + Offset. if (PIC32) - Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, - SDLoc(), getPointerTy()), + Offset = DAG.getNode(ISD::ADD, DL, PtrVT, + DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Offset); // Lowering the machine isd will make sure everything is in the right @@ -11397,8 +11661,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // And our return value (tls address) is in the standard call return value // location. unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; - return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(), - Chain.getValue(1)); + return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1)); } if (Subtarget->isTargetKnownWindowsMSVC() || @@ -11426,50 +11689,50 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { : Type::getInt32PtrTy(*DAG.getContext(), 257)); - SDValue TlsArray = - Subtarget->is64Bit() - ? DAG.getIntPtrConstant(0x58, dl) - : (Subtarget->isTargetWindowsGNU() - ? DAG.getIntPtrConstant(0x2C, dl) - : DAG.getExternalSymbol("_tls_array", getPointerTy())); + SDValue TlsArray = Subtarget->is64Bit() + ? DAG.getIntPtrConstant(0x58, dl) + : (Subtarget->isTargetWindowsGNU() + ? DAG.getIntPtrConstant(0x2C, dl) + : DAG.getExternalSymbol("_tls_array", PtrVT)); SDValue ThreadPointer = - DAG.getLoad(getPointerTy(), dl, Chain, TlsArray, - MachinePointerInfo(Ptr), false, false, false, 0); + DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr), false, + false, false, 0); SDValue res; if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) { res = ThreadPointer; } else { // Load the _tls_index variable - SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy()); + SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT); if (Subtarget->is64Bit()) - IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain, IDX, + IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX, MachinePointerInfo(), MVT::i32, false, false, false, 0); else - IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(), - false, false, false, 0); + IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo(), false, + false, false, 0); - SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()), dl, - getPointerTy()); - IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale); + auto &DL = DAG.getDataLayout(); + SDValue Scale = + DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, PtrVT); + IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale); - res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX); + res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX); } - res = DAG.getLoad(getPointerTy(), dl, Chain, res, MachinePointerInfo(), - false, false, false, 0); + res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo(), false, false, + false, 0); // Get the offset of start of .tls section SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), X86II::MO_SECREL); - SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), TGA); + SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA); // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. - return DAG.getNode(ISD::ADD, dl, getPointerTy(), res, Offset); + return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset); } llvm_unreachable("TLS not implemented for this target."); @@ -11564,8 +11827,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, unsigned Size = SrcVT.getSizeInBits()/8; MachineFunction &MF = DAG.getMachineFunction(); + auto PtrVT = getPointerTy(MF.getDataLayout()); int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, MachinePointerInfo::getFixedStack(SSFI), @@ -11614,7 +11878,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, MachineFunction &MF = DAG.getMachineFunction(); unsigned SSFISize = Op.getValueType().getSizeInBits()/8; int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + auto PtrVT = getPointerTy(MF.getDataLayout()); + SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); Tys = DAG.getVTList(MVT::Other); SDValue Ops[] = { Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag @@ -11656,7 +11921,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, // Build some magic constants. static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; Constant *C0 = ConstantDataVector::get(*Context, CV0); - SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16); SmallVector<Constant*,2> CV1; CV1.push_back( @@ -11666,7 +11932,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, APInt(64, 0x4530000000000000ULL)))); Constant *C1 = ConstantVector::get(CV1); - SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); + SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16); // Load the 64-bit value into an XMM register. SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, @@ -11882,6 +12148,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue N0 = Op.getOperand(0); SDLoc dl(Op); + auto PtrVT = getPointerTy(DAG.getDataLayout()); if (Op.getValueType().isVector()) return lowerUINT_TO_FP_vec(Op, DAG); @@ -11904,9 +12171,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Make a 64-bit buffer, and use it to build an FILD. SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); if (SrcVT == MVT::i32) { - SDValue WordOff = DAG.getConstant(4, dl, getPointerTy()); - SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, - getPointerTy(), StackSlot, WordOff); + SDValue WordOff = DAG.getConstant(4, dl, PtrVT); + SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, WordOff); SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, MachinePointerInfo(), false, false, 0); @@ -11940,22 +12206,20 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, APInt FF(32, 0x5F800000ULL); // Check whether the sign bit is set. - SDValue SignSet = DAG.getSetCC(dl, - getSetCCResultType(*DAG.getContext(), MVT::i64), - Op.getOperand(0), - DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); + SDValue SignSet = DAG.getSetCC( + dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64), + Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. SDValue FudgePtr = DAG.getConstantPool( - ConstantInt::get(*DAG.getContext(), FF.zext(64)), - getPointerTy()); + ConstantInt::get(*DAG.getContext(), FF.zext(64)), PtrVT); // Get a pointer to FF if the sign bit was set, or to 0 otherwise. SDValue Zero = DAG.getIntPtrConstant(0, dl); SDValue Four = DAG.getIntPtrConstant(4, dl); SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, Zero, Four); - FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset); + FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset); // Load the value out, extending it from f32 to f80. // FIXME: Avoid the extend by constructing the right constant pool? @@ -11974,6 +12238,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); EVT DstTy = Op.getValueType(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); if (!IsSigned && !isIntegerTypeFTOL(DstTy)) { assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); @@ -11998,7 +12263,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); unsigned MemSize = DstTy.getSizeInBits()/8; int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); unsigned Opc; if (!IsSigned && isIntegerTypeFTOL(DstTy)) @@ -12032,7 +12297,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO); Chain = Value.getValue(1); SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); - StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + StackSlot = DAG.getFrameIndex(SSFI, PtrVT); } MachineMemOperand *MMO = @@ -12403,7 +12668,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantInt::get(*Context, MaskElt); C = ConstantVector::getSplat(NumElts, C); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy()); + SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), @@ -12462,7 +12727,8 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { CV[0] = ConstantFP::get(*Context, APFloat(Sem, APInt::getHighBitsSet(SizeInBits, 1))); Constant *C = ConstantVector::get(CV); - SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16); + auto PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + SDValue CPIdx = DAG.getConstantPool(C, PtrVT, 16); SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, 16); @@ -12483,7 +12749,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { APFloat(Sem, APInt::getLowBitsSet(SizeInBits, SizeInBits - 1))); } C = ConstantVector::get(CV); - CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16); + CPIdx = DAG.getConstantPool(C, PtrVT, 16); SDValue Val = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, 16); @@ -13352,8 +13618,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, if (hasMinMax) { switch (SetCCOpcode) { default: break; - case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break; - case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break; + case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break; + case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break; } if (MinMax) { Swap = false; Invert = false; FlipSigns = false; } @@ -14172,8 +14438,8 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, SmallVector<SDValue, 8> Chains; SDValue Ptr = Ld->getBasePtr(); - SDValue Increment = - DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl, TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl, + TLI.getPointerTy(DAG.getDataLayout())); SDValue Res = DAG.getUNDEF(LoadUnitVecVT); for (unsigned i = 0; i < NumLoads; ++i) { @@ -14613,7 +14879,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, EVT VT = Op.getNode()->getValueType(0); bool Is64Bit = Subtarget->is64Bit(); - EVT SPTy = getPointerTy(); + MVT SPTy = getPointerTy(DAG.getDataLayout()); if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -14630,8 +14896,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, "have nested arguments."); } - const TargetRegisterClass *AddrRegClass = - getRegClassFor(getPointerTy()); + const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, @@ -14666,6 +14931,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); + auto PtrVT = getPointerTy(MF.getDataLayout()); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); @@ -14674,8 +14940,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), - getPointerTy()); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), MachinePointerInfo(SV), false, false, 0); } @@ -14695,8 +14960,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MemOps.push_back(Store); // Store fp_offset - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), - FIN, DAG.getIntPtrConstant(4, DL)); + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); Store = DAG.getStore(Op.getOperand(0), DL, DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), @@ -14704,20 +14968,16 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MemOps.push_back(Store); // Store ptr to overflow_arg_area - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), - FIN, DAG.getIntPtrConstant(4, DL)); - SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), - getPointerTy()); + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); + SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, MachinePointerInfo(SV, 8), false, false, 0); MemOps.push_back(Store); // Store ptr to reg_save_area. - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), - FIN, DAG.getIntPtrConstant(8, DL)); - SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), - getPointerTy()); + FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(8, DL)); + SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT); Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo(SV, 16), false, false, 0); MemOps.push_back(Store); @@ -14739,7 +14999,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { EVT ArgVT = Op.getNode()->getValueType(0); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy); + uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); uint8_t ArgMode; // Decide which area this value should be read from. @@ -14768,7 +15028,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, dl, MVT::i32), DAG.getConstant(ArgMode, dl, MVT::i8), DAG.getConstant(Align, dl, MVT::i32)}; - SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other); + SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other); SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl, VTs, InstOps, MVT::i64, MachinePointerInfo(SV), @@ -14995,6 +15255,20 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc); } +static int getSEHRegistrationNodeSize(const Function *Fn) { + if (!Fn->hasPersonalityFn()) + report_fatal_error( + "querying registration node size for function without personality"); + // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See + // WinEHStatePass for the full struct definition. + switch (classifyEHPersonality(Fn->getPersonalityFn())) { + case EHPersonality::MSVC_X86SEH: return 24; + case EHPersonality::MSVC_CXX: return 16; + default: break; + } + report_fatal_error("can only recover FP for MSVC EH personality functions"); +} + /// When the 32-bit MSVC runtime transfers control to us, either to an outlined /// function or when returning to a parent frame after catching an exception, we /// recover the parent frame pointer by doing arithmetic on the incoming EBP. @@ -15009,7 +15283,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, SDLoc dl; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - MVT PtrVT = TLI.getPointerTy(); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); // It's possible that the parent function no longer has a personality function // if the exceptional code was optimized away, in which case we just return @@ -15017,15 +15291,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, if (!Fn->hasPersonalityFn()) return EntryEBP; - // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See - // WinEHStatePass for the full struct definition. - int RegNodeSize; - switch (classifyEHPersonality(Fn->getPersonalityFn())) { - default: - report_fatal_error("can only recover FP for MSVC EH personality functions"); - case EHPersonality::MSVC_X86SEH: RegNodeSize = 24; break; - case EHPersonality::MSVC_CXX: RegNodeSize = 16; break; - } + int RegNodeSize = getSEHRegistrationNodeSize(Fn); // Get an MCSymbol that will ultimately resolve to the frame offset of the EH // registration. @@ -15034,7 +15300,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, GlobalValue::getRealLinkageName(Fn->getName())); SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT); SDValue RegNodeFrameOffset = - DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSymVal); + DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal); // RegNodeBase = EntryEBP - RegNodeSize // ParentFP = RegNodeBase - RegNodeFrameOffset @@ -15059,6 +15325,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget case INTR_TYPE_3OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case INTR_TYPE_4OP: + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(4)); case INTR_TYPE_1OP_MASK_RM: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); @@ -15143,7 +15412,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Rnd; if (Op.getNumOperands() == 6) Rnd = Op.getOperand(5); - else + else Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Rnd), @@ -15173,7 +15442,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Src1, Src2, Src3), Mask, PassThru, Subtarget, DAG); } - case VPERM_3OP_MASKZ: + case VPERM_3OP_MASKZ: case VPERM_3OP_MASK: case FMA_OP_MASK3: case FMA_OP_MASKZ: @@ -15499,6 +15768,19 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget "llvm.x86.seh.recoverfp must take a function as the first argument"); return recoverFramePointer(DAG, Fn, IncomingFPOp); } + + case Intrinsic::localaddress: { + // Returns one of the stack, base, or frame pointer registers, depending on + // which is used to reference local variables. + MachineFunction &MF = DAG.getMachineFunction(); + const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + unsigned Reg; + if (RegInfo->hasBasePointer(MF)) + Reg = RegInfo->getBaseRegister(); + else // This function handles the SP or FP case. + Reg = RegInfo->getPtrSizedFrameRegister(MF); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); + } } } @@ -15712,34 +15994,60 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); + const Function *Fn = MF.getFunction(); SDLoc dl(Op); SDValue Chain = Op.getOperand(0); + assert(Subtarget->getFrameLowering()->hasFP(MF) && + "using llvm.x86.seh.restoreframe requires a frame pointer"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - MVT VT = TLI.getPointerTy(); + MVT VT = TLI.getPointerTy(DAG.getDataLayout()); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); unsigned SPReg = RegInfo->getStackRegister(); + unsigned SlotSize = RegInfo->getSlotSize(); // Get incoming EBP. SDValue IncomingEBP = DAG.getCopyFromReg(Chain, dl, FrameReg, VT); - // Load [EBP-24] into SP. - SDValue SPAddr = - DAG.getNode(ISD::ADD, dl, VT, IncomingEBP, DAG.getConstant(-24, dl, VT)); + // SP is saved in the first field of every registration node, so load + // [EBP-RegNodeSize] into SP. + int RegNodeSize = getSEHRegistrationNodeSize(Fn); + SDValue SPAddr = DAG.getNode(ISD::ADD, dl, VT, IncomingEBP, + DAG.getConstant(-RegNodeSize, dl, VT)); SDValue NewSP = DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false, false, VT.getScalarSizeInBits() / 8); Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP); - // FIXME: Restore the base pointer in case of stack realignment! + if (!RegInfo->needsStackRealignment(MF)) { + // Adjust EBP to point back to the original frame position. + SDValue NewFP = recoverFramePointer(DAG, Fn, IncomingEBP); + Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP); + } else { + assert(RegInfo->hasBasePointer(MF) && + "functions with Win32 EH must use frame or base pointer register"); + + // Reload the base pointer (ESI) with the adjusted incoming EBP. + SDValue NewBP = recoverFramePointer(DAG, Fn, IncomingEBP); + Chain = DAG.getCopyToReg(Chain, dl, RegInfo->getBaseRegister(), NewBP); + + // Reload the spilled EBP value, now that the stack and base pointers are + // set up. + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + X86FI->setHasSEHFramePtrSave(true); + int FI = MF.getFrameInfo()->CreateSpillStackObject(SlotSize, SlotSize); + X86FI->setSEHFramePtrSaveIndex(FI); + SDValue NewFP = DAG.getLoad(VT, dl, Chain, DAG.getFrameIndex(FI, VT), + MachinePointerInfo(), false, false, false, + VT.getScalarSizeInBits() / 8); + Chain = DAG.getCopyToReg(NewFP, dl, FrameReg, NewFP); + } - // Adjust EBP to point back to the original frame position. - SDValue NewFP = recoverFramePointer(DAG, MF.getFunction(), IncomingEBP); - Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP); return Chain; } @@ -15910,7 +16218,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); @@ -15969,14 +16277,36 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -unsigned X86TargetLowering::getRegisterByName(const char* RegName, - EVT VT) const { +unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const { + const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); + const MachineFunction &MF = DAG.getMachineFunction(); + unsigned Reg = StringSwitch<unsigned>(RegName) .Case("esp", X86::ESP) .Case("rsp", X86::RSP) + .Case("ebp", X86::EBP) + .Case("rbp", X86::RBP) .Default(0); + + if (Reg == X86::EBP || Reg == X86::RBP) { + if (!TFI.hasFP(MF)) + report_fatal_error("register " + StringRef(RegName) + + " is allocatable: function has no frame pointer"); +#ifndef NDEBUG + else { + const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + unsigned FrameReg = + RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); + assert((FrameReg == X86::EBP || FrameReg == X86::RBP) && + "Invalid Frame Register!"); + } +#endif + } + if (Reg) return Reg; + report_fatal_error("Invalid register name global variable"); } @@ -15992,7 +16322,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Handler = Op.getOperand(2); SDLoc dl (Op); - EVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || @@ -16211,7 +16541,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, // Save FP Control Word to stack slot int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + SDValue StackSlot = + DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout())); MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), @@ -16572,7 +16903,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons } SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), - getPointerTy()); + getPointerTy(DAG.getDataLayout())); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) @@ -16642,9 +16973,9 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, // If we have a signed multiply but no PMULDQ fix up the high parts of a // unsigned multiply. if (IsSigned && !Subtarget->hasSSE41()) { - SDValue ShAmt = - DAG.getConstant(31, dl, - DAG.getTargetLoweringInfo().getShiftAmountTy(VT)); + SDValue ShAmt = DAG.getConstant( + 31, dl, + DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout())); SDValue T1 = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1); SDValue T2 = DAG.getNode(ISD::AND, dl, VT, @@ -16717,6 +17048,38 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI : (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI; + auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) { + assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type"); + MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); + SDValue Ex = DAG.getBitcast(ExVT, R); + + if (ShiftAmt >= 32) { + // Splat sign to upper i32 dst, and SRA upper i32 src to lower i32. + SDValue Upper = + getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG); + SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, + ShiftAmt - 32, DAG); + if (VT == MVT::v2i64) + Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3}); + if (VT == MVT::v4i64) + Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, + {9, 1, 11, 3, 13, 5, 15, 7}); + } else { + // SRA upper i32, SHL whole i64 and select lower i32. + SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, + ShiftAmt, DAG); + SDValue Lower = + getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG); + Lower = DAG.getBitcast(ExVT, Lower); + if (VT == MVT::v2i64) + Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3}); + if (VT == MVT::v4i64) + Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, + {8, 1, 10, 3, 12, 5, 14, 7}); + } + return DAG.getBitcast(VT, Ex); + }; + // Optimize shl/srl/sra with constant shift amount. if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) { if (auto *ShiftConst = BVAmt->getConstantSplatNode()) { @@ -16725,6 +17088,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); + // i64 SRA needs to be performed as partial shifts. + if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Op.getOpcode() == ISD::SRA) + return ArithmeticShiftRight64(ShiftAmt); + if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) { unsigned NumElts = VT.getVectorNumElements(); MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); @@ -16808,7 +17176,12 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, if (ShAmt != ShiftAmt) return SDValue(); } - return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); + + if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) + return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); + + if (Op.getOpcode() == ISD::SRA) + return ArithmeticShiftRight64(ShiftAmt); } return SDValue(); @@ -16890,7 +17263,9 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, if (Vals[j] != Amt.getOperand(i + j)) return SDValue(); } - return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1)); + + if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) + return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1)); } return SDValue(); } @@ -17042,6 +17417,53 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, } } + // v4i32 Non Uniform Shifts. + // If the shift amount is constant we can shift each lane using the SSE2 + // immediate shifts, else we need to zero-extend each lane to the lower i64 + // and shift using the SSE2 variable shifts. + // The separate results can then be blended together. + if (VT == MVT::v4i32) { + unsigned Opc = Op.getOpcode(); + SDValue Amt0, Amt1, Amt2, Amt3; + if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { + Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0}); + Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1}); + Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2}); + Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3}); + } else { + // ISD::SHL is handled above but we include it here for completeness. + switch (Opc) { + default: + llvm_unreachable("Unknown target vector shift node"); + case ISD::SHL: + Opc = X86ISD::VSHL; + break; + case ISD::SRL: + Opc = X86ISD::VSRL; + break; + case ISD::SRA: + Opc = X86ISD::VSRA; + break; + } + // The SSE2 shifts use the lower i64 as the same shift amount for + // all lanes and the upper i64 is ignored. These shuffle masks + // optimally zero-extend each lanes on SSE2/SSE41/AVX targets. + SDValue Z = getZeroVector(VT, Subtarget, DAG, dl); + Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1}); + Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1}); + Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1}); + Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1}); + } + + SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0); + SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1); + SDValue R2 = DAG.getNode(Opc, dl, VT, R, Amt2); + SDValue R3 = DAG.getNode(Opc, dl, VT, R, Amt3); + SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1}); + SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7}); + return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7}); + } + if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) { MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); unsigned ShiftOpcode = Op->getOpcode(); @@ -17944,7 +18366,8 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget, // the results are returned via SRet in memory. const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret"; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy()); + SDValue Callee = + DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout())); Type *RetTy = isF64 ? (Type*)StructType::get(ArgTy, ArgTy, nullptr) @@ -18443,10 +18866,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; - case X86ISD::UMAX: return "X86ISD::UMAX"; - case X86ISD::UMIN: return "X86ISD::UMIN"; - case X86ISD::SMAX: return "X86ISD::SMAX"; - case X86ISD::SMIN: return "X86ISD::SMIN"; case X86ISD::ABS: return "X86ISD::ABS"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; @@ -18456,6 +18875,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; + case X86ISD::EXTRQI: return "X86ISD::EXTRQI"; + case X86ISD::INSERTQI: return "X86ISD::INSERTQI"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; @@ -18478,6 +18899,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD"; + case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; @@ -18594,16 +19016,19 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ADDS: return "X86ISD::ADDS"; case X86ISD::SUBS: return "X86ISD::SUBS"; case X86ISD::AVG: return "X86ISD::AVG"; + case X86ISD::MULHRS: return "X86ISD::MULHRS"; case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND"; case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND"; + case X86ISD::FP_TO_SINT_RND: return "X86ISD::FP_TO_SINT_RND"; + case X86ISD::FP_TO_UINT_RND: return "X86ISD::FP_TO_UINT_RND"; } return nullptr; } // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. -bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, +bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // X86 supports extremely general addressing modes. CodeModel::Model M = getTargetMachine().getCodeModel(); @@ -19555,7 +19980,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *AddrRegClass = - getRegClassFor(getPointerTy()); + getRegClassFor(getPointerTy(MF->getDataLayout())); unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass), @@ -19750,7 +20175,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, MemOpndSlot = CurOp; - MVT PVT = getPointerTy(); + MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); @@ -19882,7 +20307,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); - MVT PVT = getPointerTy(); + MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); @@ -21377,7 +21802,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, // alignment is valid. unsigned Align = LN0->getAlignment(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( + unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( EltVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT)) @@ -21513,14 +21938,15 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) { SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector); - EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(); + auto &DL = DAG.getDataLayout(); + EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(DL); SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst, DAG.getConstant(0, dl, VecIdxTy)); SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst, DAG.getConstant(1, dl, VecIdxTy)); - SDValue ShAmt = DAG.getConstant(32, dl, - DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64)); + SDValue ShAmt = DAG.getConstant( + 32, dl, DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64, DL)); Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf); Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, DAG.getNode(ISD::SRA, dl, MVT::i64, BottomHalf, ShAmt)); @@ -21539,10 +21965,11 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, // Replace each use (extract) with a load of the appropriate element. for (unsigned i = 0; i < 4; ++i) { uint64_t Offset = EltSize * i; - SDValue OffsetVal = DAG.getConstant(Offset, dl, TLI.getPointerTy()); + auto PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + SDValue OffsetVal = DAG.getConstant(Offset, dl, PtrVT); - SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), - StackPtr, OffsetVal); + SDValue ScalarAddr = + DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, OffsetVal); // Load the scalar. Vals[i] = DAG.getLoad(ElementType, dl, Ch, @@ -21622,16 +22049,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS, default: break; case ISD::SETULT: case ISD::SETULE: - Opc = hasUnsigned ? X86ISD::UMIN : 0u; break; + Opc = hasUnsigned ? ISD::UMIN : 0; break; case ISD::SETUGT: case ISD::SETUGE: - Opc = hasUnsigned ? X86ISD::UMAX : 0u; break; + Opc = hasUnsigned ? ISD::UMAX : 0; break; case ISD::SETLT: case ISD::SETLE: - Opc = hasSigned ? X86ISD::SMIN : 0u; break; + Opc = hasSigned ? ISD::SMIN : 0; break; case ISD::SETGT: case ISD::SETGE: - Opc = hasSigned ? X86ISD::SMAX : 0u; break; + Opc = hasSigned ? ISD::SMAX : 0; break; } // Check for x CC y ? y : x -- a min/max with reversed arms. } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && @@ -21640,16 +22067,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS, default: break; case ISD::SETULT: case ISD::SETULE: - Opc = hasUnsigned ? X86ISD::UMAX : 0u; break; + Opc = hasUnsigned ? ISD::UMAX : 0; break; case ISD::SETUGT: case ISD::SETUGE: - Opc = hasUnsigned ? X86ISD::UMIN : 0u; break; + Opc = hasUnsigned ? ISD::UMIN : 0; break; case ISD::SETLT: case ISD::SETLE: - Opc = hasSigned ? X86ISD::SMAX : 0u; break; + Opc = hasSigned ? ISD::SMAX : 0; break; case ISD::SETGT: case ISD::SETGE: - Opc = hasSigned ? X86ISD::SMIN : 0u; break; + Opc = hasSigned ? ISD::SMIN : 0; break; } } @@ -22106,7 +22533,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Check if the selector will be produced by CMPP*/PCMP* Cond.getOpcode() == ISD::SETCC && // Check if SETCC has already been promoted - TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT) { + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) == + CondVT) { bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode()); @@ -22826,7 +23254,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { // We shift all of the values by one. In many cases we do not have // hardware support for this operation. This is better expressed as an ADD // of two values. - if (N1SplatC->getZExtValue() == 1) + if (N1SplatC->getAPIntValue() == 1) return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); } @@ -23478,7 +23906,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue Ptr = Ld->getBasePtr(); - SDValue Increment = DAG.getConstant(16, dl, TLI.getPointerTy()); + SDValue Increment = + DAG.getConstant(16, dl, TLI.getPointerTy(DAG.getDataLayout())); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), NumElems/2); @@ -23687,7 +24116,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl); SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl); - SDValue Stride = DAG.getConstant(16, dl, TLI.getPointerTy()); + SDValue Stride = + DAG.getConstant(16, dl, TLI.getPointerTy(DAG.getDataLayout())); SDValue Ptr0 = St->getBasePtr(); SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride); @@ -23760,8 +24190,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff); SmallVector<SDValue, 8> Chains; - SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, dl, - TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, dl, + TLI.getPointerTy(DAG.getDataLayout())); SDValue Ptr = St->getBasePtr(); // Perform one or more big stores into memory. @@ -24659,6 +25089,31 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, return SDValue(); } +static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + SDValue Op0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT InVT = Op0.getValueType(); + EVT InSVT = InVT.getScalarType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32)) + // UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32)) + if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) { + SDLoc dl(N); + EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, + InVT.getVectorNumElements()); + SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); + + if (TLI.isOperationLegal(ISD::UINT_TO_FP, DstVT)) + return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P); + + return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); + } + + return SDValue(); +} + static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { // First try to optimize away the conversion entirely when it's @@ -24913,6 +25368,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget); case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, Subtarget); + case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, Subtarget); case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); case X86ISD::FXOR: @@ -25135,7 +25591,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) || matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) { AsmPieces.clear(); - const std::string &ConstraintsStr = IA->getConstraintString(); + StringRef ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (clobbersFlagRegisters(AsmPieces)) @@ -25149,7 +25605,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) && matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) { AsmPieces.clear(); - const std::string &ConstraintsStr = IA->getConstraintString(); + StringRef ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); array_pod_sort(AsmPieces.begin(), AsmPieces.end()); if (clobbersFlagRegisters(AsmPieces)) @@ -25176,7 +25632,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. X86TargetLowering::ConstraintType -X86TargetLowering::getConstraintType(const std::string &Constraint) const { +X86TargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'R': @@ -25508,7 +25964,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::pair<unsigned, const TargetRegisterClass *> X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, + StringRef Constraint, MVT VT) const { // First, see if this is a constraint that directly corresponds to an LLVM // register class. @@ -25717,8 +26173,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return Res; } -int X86TargetLowering::getScalingFactorCost(const AddrMode &AM, - Type *Ty, +int X86TargetLowering::getScalingFactorCost(const DataLayout &DL, + const AddrMode &AM, Type *Ty, unsigned AS) const { // Scaling factors are not free at all. // An indexed folded instruction, i.e., inst (reg1, reg2, scale), @@ -25738,7 +26194,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM, // E.g., on Haswell: // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3. // vmovaps %ymm1, (%r8) can use port 2, 3, or 7. - if (isLegalAddressingMode(AM, Ty, AS)) + if (isLegalAddressingMode(DL, AM, Ty, AS)) // Scale represents reg2 * scale, thus account for 1 // as soon as we use a second register. return AM.Scale != 0; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 17660891635c..723d5304495c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -233,12 +233,6 @@ namespace llvm { /// Floating point horizontal sub. FHSUB, - /// Unsigned integer max and min. - UMAX, UMIN, - - /// Signed integer max and min. - SMAX, SMIN, - // Integer absolute value ABS, @@ -298,8 +292,8 @@ namespace llvm { // Vector FP round. VFPROUND, - // Vector signed integer to double. - CVTDQ2PD, + // Vector signed/unsigned integer to double. + CVTDQ2PD, CVTUDQ2PD, // 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, @@ -400,10 +394,15 @@ namespace llvm { VINSERT, VEXTRACT, + /// SSE4A Extraction and Insertion. + EXTRQI, INSERTQI, + // Vector multiply packed unsigned doubleword integers PMULUDQ, // Vector multiply packed signed doubleword integers PMULDQ, + // Vector Multiply Packed UnsignedIntegers with Round and Scale + MULHRS, // FMA nodes FMADD, @@ -429,6 +428,9 @@ namespace llvm { //with rounding mode SINT_TO_FP_RND, UINT_TO_FP_RND, + + // Vector float/double to signed/unsigned integer. + FP_TO_SINT_RND, FP_TO_UINT_RND, // Save xmm argument registers to the stack, according to %al. An operator // is needed so that this can be expanded with control flow. VASTART_SAVE_XMM_REGS, @@ -599,7 +601,9 @@ namespace llvm { unsigned getJumpTableEncoding() const override; bool useSoftFloat() const override; - MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; } + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i8; + } const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, @@ -617,7 +621,8 @@ namespace llvm { /// function arguments in the caller parameter area. For X86, aggregates /// that contains are placed at 16-byte boundaries while the rest are at /// 4-byte boundaries. - unsigned getByValTypeAlignment(Type *Ty) const override; + unsigned getByValTypeAlignment(Type *Ty, + const DataLayout &DL) const override; /// Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove @@ -685,7 +690,8 @@ namespace llvm { bool isCheapToSpeculateCtlz() const override; /// Return the value type to use for ISD::SETCC. - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; /// Determine which of the bits specified in Mask are known to be either /// zero or one and return them in the KnownZero/KnownOne bitsets. @@ -707,8 +713,7 @@ namespace llvm { bool ExpandInlineAsm(CallInst *CI) const override; - ConstraintType - getConstraintType(const std::string &Constraint) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. @@ -726,8 +731,8 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; - unsigned getInlineAsmMemConstraint( - const std::string &ConstraintCode) const override { + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { if (ConstraintCode == "i") return InlineAsm::Constraint_i; else if (ConstraintCode == "o") @@ -745,13 +750,12 @@ namespace llvm { /// error, this returns a register number of 0. std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; /// Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, - unsigned AS) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS) const override; /// Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can @@ -770,7 +774,7 @@ namespace llvm { /// of the specified type. /// If the AM is supported, the return value must be >= 0. /// If the AM is not supported, it returns a negative value. - int getScalingFactorCost(const AddrMode &AM, Type *Ty, + int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; bool isVectorShiftByScalarCheap(Type *Ty) const override; @@ -872,7 +876,8 @@ namespace llvm { return nullptr; // nothing to do, move along. } - unsigned getRegisterByName(const char* RegName, EVT VT) const override; + unsigned getRegisterByName(const char* RegName, EVT VT, + SelectionDAG &DAG) const override; /// This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index b309b8210851..faa91500b181 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3136,6 +3136,12 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul, SSE_INTALU_ITINS_P, HasBWI, 1>; defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul, SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; +defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulh", mulhs, SSE_INTALU_ITINS_P, + HasBWI, 1>; +defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhu", mulhu, SSE_INTMUL_ITINS_P, + HasBWI, 1>; +defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrs", X86mulhrs, SSE_INTMUL_ITINS_P, + HasBWI, 1>, T8PD; defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, SSE_INTALU_ITINS_P, HasBWI, 1>; @@ -3230,32 +3236,32 @@ let Predicates = [HasBWI] in { defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W; } -defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax, +defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", smax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax, +defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", smax, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax, +defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax, +defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", umax, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax, +defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", umax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax, +defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin, +defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", smin, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin, +defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", smin, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin, +defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; -defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin, +defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", umin, SSE_INTALU_ITINS_P, HasBWI, 1>; -defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin, +defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", umin, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; -defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin, +defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; //===----------------------------------------------------------------------===// @@ -4035,7 +4041,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src1, - _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>, + _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>, AVX512FMA3Base, EVEX_B; } } @@ -4394,16 +4400,16 @@ def : Pat<(f64 (sint_to_fp GR32:$src)), def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; -defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR32, +defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32, v4f32x_info, i32mem, loadi32, "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64, +defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64, v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86SuintToFpRnd, GR32, v2f64x_info, +defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info, i32mem, loadi32, "cvtusi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64, +defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64, v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -4604,117 +4610,389 @@ def : Pat<(extloadf32 addr:$src), def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>, Requires<[HasAVX512]>; -multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT, ValueType InVT, - Domain d> { -let hasSideEffects = 0 in { - def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), - !strconcat(asm,"\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, - (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX; - def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc), - !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), - [], d>, EVEX, EVEX_B, EVEX_RC; - let mayLoad = 1 in - def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), - !strconcat(asm,"\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, - (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX; -} // hasSideEffects = 0 +//===----------------------------------------------------------------------===// +// AVX-512 Vector convert from signed/unsigned integer to float/double +// and from float/double to signed/unsigned integer +//===----------------------------------------------------------------------===// + +multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNode, + string Broadcast = _.BroadcastStr, + string Alias = ""> { + + defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _Src.RC:$src), OpcodeStr, "$src", "$src", + (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, EVEX; + + defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _Src.MemOp:$src), OpcodeStr#Alias, "$src", "$src", + (_.VT (OpNode (_Src.VT + (bitconvert (_Src.LdFrag addr:$src)))))>, EVEX; + + defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _Src.MemOp:$src), OpcodeStr, + "${src}"##Broadcast, "${src}"##Broadcast, + (_.VT (OpNode (_Src.VT + (X86VBroadcast (_Src.ScalarLdFrag addr:$src))) + ))>, EVEX, EVEX_B; +} +// Coversion with SAE - suppress all exceptions +multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNodeRnd> { + defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _Src.RC:$src), OpcodeStr, + "{sae}, $src", "$src, {sae}", + (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), + (i32 FROUND_NO_EXC)))>, + EVEX, EVEX_B; } -multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT, ValueType InVT, - Domain d> { -let hasSideEffects = 0 in { - def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), - !strconcat(asm,"\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, - (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX; - let mayLoad = 1 in - def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), - !strconcat(asm,"\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, - (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX; -} // hasSideEffects = 0 +// Conversion with rounding control (RC) +multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNodeRnd> { + defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, + "$rc, $src", "$src, $rc", + (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>, + EVEX, EVEX_B, EVEX_RC; } -defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround, - loadv8f64, f512mem, v8f32, v8f64, - SSEPackedSingle>, EVEX_V512, VEX_W, PD, - EVEX_CD8<64, CD8VF>; +// Extend Float to Double +multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, fextend>, + avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, + X86vfpextRnd>, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info, + X86vfpext, "{1to2}">, EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fextend>, + EVEX_V256; + } +} + +// Truncate Double to Float +multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fround>, + avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info, + X86vfproundRnd>, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, + X86vfpround, "{1to2}", "{x}">, EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fround, + "{1to4}", "{y}">, EVEX_V256; + } +} + +defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">, + VEX_W, PD, EVEX_CD8<64, CD8VF>; +defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">, + PS, EVEX_CD8<32, CD8VH>; -defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend, - loadv4f64, f256mem, v8f64, v8f32, - SSEPackedDouble>, EVEX_V512, PS, - EVEX_CD8<32, CD8VH>; def : Pat<(v8f64 (extloadv8f32 addr:$src)), (VCVTPS2PDZrm addr:$src)>; -def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src), - (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))), - (VCVTPD2PSZrr VR512:$src)>; +let Predicates = [HasVLX] in { + def : Pat<(v4f64 (extloadv4f32 addr:$src)), + (VCVTPS2PDZ256rm addr:$src)>; +} -def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src), - (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)), - (VCVTPD2PSZrrb VR512:$src, imm:$rc)>; +// Convert Signed/Unsigned Doubleword to Double +multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, + SDNode OpNode128> { + // No rounding in this op + let Predicates = [HasAVX512] in + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode>, + EVEX_V512; -//===----------------------------------------------------------------------===// -// AVX-512 Vector convert from sign integer to float/double -//===----------------------------------------------------------------------===// + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, + OpNode128, "{1to2}">, EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode>, + EVEX_V256; + } +} -defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp, - loadv8i64, i512mem, v16f32, v16i32, - SSEPackedSingle>, EVEX_V512, PS, - EVEX_CD8<32, CD8VF>; +// Convert Signed/Unsigned Doubleword to Float +multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode>, + avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, + OpNodeRnd>, EVEX_V512; + + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode>, + EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode>, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Doubleword with truncation +multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>, + avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, + OpNodeRnd>, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>, + EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Doubleword +multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>, + avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, + OpNodeRnd>, EVEX_V512; + } + let Predicates = [HasVLX] in { + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>, + EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>, + EVEX_V256; + } +} + +// Convert Double to Signed/Unsigned Doubleword with truncation +multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>, + avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, + OpNodeRnd>, EVEX_V512; + } + let Predicates = [HasVLX] in { + // we need "x"/"y" suffixes in order to distinguish between 128 and 256 + // memory forms of these instructions in Asm Parcer. They have the same + // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly + // due to the same reason. + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode, + "{1to2}", "{x}">, EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, + "{1to4}", "{y}">, EVEX_V256; + } +} + +// Convert Double to Signed/Unsigned Doubleword +multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasAVX512] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>, + avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, + OpNodeRnd>, EVEX_V512; + } + let Predicates = [HasVLX] in { + // we need "x"/"y" suffixes in order to distinguish between 128 and 256 + // memory forms of these instructions in Asm Parcer. They have the same + // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly + // due to the same reason. + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode, + "{1to2}", "{x}">, EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, + "{1to4}", "{y}">, EVEX_V256; + } +} + +// Convert Double to Signed/Unsigned Quardword +multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>, + avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, + OpNodeRnd>, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>, + EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>, + EVEX_V256; + } +} -defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp, - loadv4i64, i256mem, v8f64, v8i32, - SSEPackedDouble>, EVEX_V512, XS, +// Convert Double to Signed/Unsigned Quardword with truncation +multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>, + avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, + OpNodeRnd>, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>, + EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>, + EVEX_V256; + } +} + +// Convert Signed/Unsigned Quardword to Double +multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode>, + avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, + OpNodeRnd>, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode>, + EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode>, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Quardword +multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>, + avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, + OpNodeRnd>, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + // Explicitly specified broadcast string, since we take only 2 elements + // from v4f32x_info source + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, + "{1to2}">, EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>, + EVEX_V256; + } +} + +// Convert Float to Signed/Unsigned Quardword with truncation +multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>, + avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, + OpNodeRnd>, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + // Explicitly specified broadcast string, since we take only 2 elements + // from v4f32x_info source + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, + "{1to2}">, EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>, + EVEX_V256; + } +} + +// Convert Signed/Unsigned Quardword to Float +multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, + SDNode OpNode, SDNode OpNodeRnd> { + let Predicates = [HasDQI] in { + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode>, + avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info, + OpNodeRnd>, EVEX_V512; + } + let Predicates = [HasDQI, HasVLX] in { + // we need "x"/"y" suffixes in order to distinguish between 128 and 256 + // memory forms of these instructions in Asm Parcer. They have the same + // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly + // due to the same reason. + defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode, + "{1to2}", "{x}">, EVEX_V128; + defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode, + "{1to4}", "{y}">, EVEX_V256; + } +} + +defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86cvtdq2pd>, XS, EVEX_CD8<32, CD8VH>; -defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, - loadv16f32, f512mem, v16i32, v16f32, - SSEPackedSingle>, EVEX_V512, XS, +defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp, + X86VSintToFpRnd>, + PS, EVEX_CD8<32, CD8VF>; + +defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint, + X86VFpToSintRnd>, + XS, EVEX_CD8<32, CD8VF>; + +defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, + X86VFpToSintRnd>, + PD, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint, + X86VFpToUintRnd>, PS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, - loadv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, PD, VEX_W, +defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, + X86VFpToUintRnd>, PS, VEX_W, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, - loadv16f32, f512mem, v16i32, v16f32, - SSEPackedSingle>, EVEX_V512, PS, +defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>, + XS, EVEX_CD8<32, CD8VH>; + +defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp, + X86VUintToFpRnd>, XD, EVEX_CD8<32, CD8VF>; -// cvttps2udq (src, 0, mask-all-ones, sae-current) -def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)), - (VCVTTPS2UDQZrr VR512:$src)>; +defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtps2Int, + X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, - loadv8f64, f512mem, v8i32, v8f64, - SSEPackedDouble>, EVEX_V512, PS, VEX_W, +defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtpd2Int, + X86cvtpd2IntRnd>, XD, VEX_W, EVEX_CD8<64, CD8VF>; -// cvttpd2udq (src, 0, mask-all-ones, sae-current) -def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src), - (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)), - (VCVTTPD2UDQZrr VR512:$src)>; +defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtps2UInt, + X86cvtps2UIntRnd>, + PS, EVEX_CD8<32, CD8VF>; +defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtpd2UInt, + X86cvtpd2UIntRnd>, VEX_W, + PS, EVEX_CD8<64, CD8VF>; -defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, - loadv4i64, f256mem, v8f64, v8i32, - SSEPackedDouble>, EVEX_V512, XS, - EVEX_CD8<32, CD8VH>; +defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtpd2Int, + X86cvtpd2IntRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; -defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp, - loadv16i32, f512mem, v16f32, v16i32, - SSEPackedSingle>, EVEX_V512, XD, - EVEX_CD8<32, CD8VF>; +defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtps2Int, + X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VH>; + +defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtpd2UInt, + X86cvtpd2UIntRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; + +defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtps2UInt, + X86cvtps2UIntRnd>, PD, EVEX_CD8<32, CD8VH>; + +defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint, + X86VFpToSlongRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, + X86VFpToSlongRnd>, PD, EVEX_CD8<32, CD8VH>; + +defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint, + X86VFpToUlongRnd>, VEX_W, + PD, EVEX_CD8<64, CD8VF>; + +defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, + X86VFpToUlongRnd>, PD, EVEX_CD8<32, CD8VH>; + +defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp, + X86VSlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>; +defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp, + X86VUlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>; + +defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, + X86VSlongToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>; + +defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, + X86VUlongToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>; + +let Predicates = [NoVLX] in { def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; @@ -4734,67 +5012,8 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>; - -def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)), - (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>; -def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), - (VCVTDQ2PDZrr VR256X:$src)>; -def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)), - (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>; -def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))), - (VCVTUDQ2PDZrr VR256X:$src)>; - -multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC, - RegisterClass DstRC, PatFrag mem_frag, - X86MemOperand x86memop, Domain d> { -let hasSideEffects = 0 in { - def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), - !strconcat(asm,"\t{$src, $dst|$dst, $src}"), - [], d>, EVEX; - def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc), - !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), - [], d>, EVEX, EVEX_B, EVEX_RC; - let mayLoad = 1 in - def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), - !strconcat(asm,"\t{$src, $dst|$dst, $src}"), - [], d>, EVEX; -} // hasSideEffects = 0 } -defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512, - loadv16f32, f512mem, SSEPackedSingle>, PD, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X, - loadv8f64, f512mem, SSEPackedDouble>, XD, VEX_W, - EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1), imm:$rc)), - (VCVTPS2DQZrrb VR512:$src, imm:$rc)>; - -def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src), - (v8i32 immAllZerosV), (i8 -1), imm:$rc)), - (VCVTPD2DQZrrb VR512:$src, imm:$rc)>; - -defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512, - loadv16f32, f512mem, SSEPackedSingle>, - PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X, - loadv8f64, f512mem, SSEPackedDouble>, VEX_W, - PS, EVEX_V512, EVEX_CD8<64, CD8VF>; - -def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src), - (v16i32 immAllZerosV), (i16 -1), imm:$rc)), - (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>; - -def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src), - (v8i32 immAllZerosV), (i8 -1), imm:$rc)), - (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>; - let Predicates = [HasAVX512] in { def : Pat<(v8f32 (fround (loadv8f64 addr:$src))), (VCVTPD2PSZrm addr:$src)>; diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 6ab961f04ecf..4cd5563ce727 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -105,14 +105,16 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in // jecxz. let Uses = [CX] in def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jcxz\t$dst", [], IIC_JCXZ>, AdSize16; + "jcxz\t$dst", [], IIC_JCXZ>, AdSize16, + Requires<[Not64BitMode]>; let Uses = [ECX] in def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), "jecxz\t$dst", [], IIC_JCXZ>, AdSize32; let Uses = [RCX] in def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jrcxz\t$dst", [], IIC_JCXZ>, AdSize64; + "jrcxz\t$dst", [], IIC_JCXZ>, AdSize64, + Requires<[In64BitMode]>; } // Indirect branches diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index fe245c3a7e38..1f61ffa84e9a 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -39,11 +39,6 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisFP<1>, SDTCisVT<3, i8>, SDTCisVec<1>]>; -def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>; -def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>; -def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>; -def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>; - def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; @@ -75,6 +70,9 @@ def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD", SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>, SDTCisVT<1, v4i32>]>>; +def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD", + SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>, + SDTCisVT<1, v4i32>]>>; def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; @@ -187,6 +185,7 @@ def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp>; def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp>; def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>; +def X86mulhrs : SDNode<"X86ISD::MULHRS" , SDTIntBinOp>; def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; @@ -208,6 +207,14 @@ def X86pmuldq : SDNode<"X86ISD::PMULDQ", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>>; +def X86extrqi : SDNode<"X86ISD::EXTRQI", + SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, + SDTCisVT<2, i8>, SDTCisVT<3, i8>]>>; +def X86insertqi : SDNode<"X86ISD::INSERTQI", + SDTypeProfile<1, 4, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, + SDTCisSameAs<1,2>, SDTCisVT<3, i8>, + SDTCisVT<4, i8>]>>; + // Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get // translated into one of the target nodes below during lowering. // Note: this is a work in progress... @@ -357,8 +364,70 @@ def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1, def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>; -def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>; -def X86SuintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>; +def SDTDoubleToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>; +def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>; + +def SDTDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>; +def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>; + +def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<0>, SDTCVecEltisVT<1, i32>, + SDTCisInt<2>]>; +def SDTVlongToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<0>, SDTCVecEltisVT<1, i64>, + SDTCisInt<2>]>; + +def SDTVFPToIntRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<1>, SDTCVecEltisVT<0, i32>, + SDTCisInt<2>]>; +def SDTVFPToLongRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<1>, SDTCVecEltisVT<0, i64>, + SDTCisInt<2>]>; + +// Scalar +def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>; +def X86UintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>; + +// Vector with rounding mode + +// cvtt fp-to-int staff +def X86VFpToSintRnd : SDNode<"ISD::FP_TO_SINT", SDTVFPToIntRound>; +def X86VFpToUintRnd : SDNode<"ISD::FP_TO_UINT", SDTVFPToIntRound>; +def X86VFpToSlongRnd : SDNode<"ISD::FP_TO_SINT", SDTVFPToLongRound>; +def X86VFpToUlongRnd : SDNode<"ISD::FP_TO_UINT", SDTVFPToLongRound>; + +def X86VSintToFpRnd : SDNode<"ISD::SINT_TO_FP", SDTVintToFPRound>; +def X86VUintToFpRnd : SDNode<"ISD::UINT_TO_FP", SDTVintToFPRound>; +def X86VSlongToFpRnd : SDNode<"ISD::SINT_TO_FP", SDTVlongToFPRound>; +def X86VUlongToFpRnd : SDNode<"ISD::UINT_TO_FP", SDTVlongToFPRound>; + +// cvt fp-to-int staff +def X86cvtps2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTFloatToIntRnd>; +def X86cvtps2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTFloatToIntRnd>; +def X86cvtpd2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTDoubleToIntRnd>; +def X86cvtpd2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTDoubleToIntRnd>; + +// Vector without rounding mode +def X86cvtps2Int : SDNode<"X86ISD::FP_TO_SINT_RND", SDTFloatToInt>; +def X86cvtps2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTFloatToInt>; +def X86cvtpd2Int : SDNode<"X86ISD::FP_TO_SINT_RND", SDTDoubleToInt>; +def X86cvtpd2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTDoubleToInt>; + +def X86vfpextRnd : SDNode<"X86ISD::VFPEXT", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<0>, SDTCisFP<1>, + SDTCisOpSmallerThanOp<1, 0>, + SDTCisInt<2>]>>; +def X86vfproundRnd: SDNode<"X86ISD::VFPROUND", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<0>, SDTCisFP<1>, + SDTCVecEltisVT<0, f32>, + SDTCVecEltisVT<1, f64>, + SDTCisInt<2>]>>; //===----------------------------------------------------------------------===// // SSE Complex Patterns diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index b92ba99fb100..786150760b93 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -269,14 +269,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::XOR8rr, X86::XOR8mr, 0 } }; - for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2Addr); i != e; ++i) { - unsigned RegOp = MemoryFoldTable2Addr[i].RegOp; - unsigned MemOp = MemoryFoldTable2Addr[i].MemOp; - unsigned Flags = MemoryFoldTable2Addr[i].Flags; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) { AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable, - RegOp, MemOp, + Entry.RegOp, Entry.MemOp, // Index 0, folded load and store, no alignment requirement. - Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); + Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); } static const X86MemoryFoldTableEntry MemoryFoldTable0[] = { @@ -424,12 +421,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE } }; - for (unsigned i = 0, e = array_lengthof(MemoryFoldTable0); i != e; ++i) { - unsigned RegOp = MemoryFoldTable0[i].RegOp; - unsigned MemOp = MemoryFoldTable0[i].MemOp; - unsigned Flags = MemoryFoldTable0[i].Flags; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) { AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable, - RegOp, MemOp, TB_INDEX_0 | Flags); + Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags); } static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { @@ -862,14 +856,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 } }; - for (unsigned i = 0, e = array_lengthof(MemoryFoldTable1); i != e; ++i) { - unsigned RegOp = MemoryFoldTable1[i].RegOp; - unsigned MemOp = MemoryFoldTable1[i].MemOp; - unsigned Flags = MemoryFoldTable1[i].Flags; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) { AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable, - RegOp, MemOp, + Entry.RegOp, Entry.MemOp, // Index 1, folded load - Flags | TB_INDEX_1 | TB_FOLDED_LOAD); + Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD); } static const X86MemoryFoldTableEntry MemoryFoldTable2[] = { @@ -1116,6 +1107,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 }, { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 }, { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 }, + { X86::ROUNDSDr, X86::ROUNDSDm, 0 }, + { X86::ROUNDSSr, X86::ROUNDSSm, 0 }, { X86::SBB32rr, X86::SBB32rm, 0 }, { X86::SBB64rr, X86::SBB64rm, 0 }, { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 }, @@ -1412,6 +1405,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 }, { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 }, { X86::VPXORrr, X86::VPXORrm, 0 }, + { X86::VROUNDSDr, X86::VROUNDSDm, 0 }, + { X86::VROUNDSSr, X86::VROUNDSSm, 0 }, { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 }, { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 }, { X86::VSUBPDrr, X86::VSUBPDrm, 0 }, @@ -1733,14 +1728,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 } }; - for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2); i != e; ++i) { - unsigned RegOp = MemoryFoldTable2[i].RegOp; - unsigned MemOp = MemoryFoldTable2[i].MemOp; - unsigned Flags = MemoryFoldTable2[i].Flags; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) { AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable, - RegOp, MemOp, + Entry.RegOp, Entry.MemOp, // Index 2, folded load - Flags | TB_INDEX_2 | TB_FOLDED_LOAD); + Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD); } static const X86MemoryFoldTableEntry MemoryFoldTable3[] = { @@ -1949,14 +1941,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 } }; - for (unsigned i = 0, e = array_lengthof(MemoryFoldTable3); i != e; ++i) { - unsigned RegOp = MemoryFoldTable3[i].RegOp; - unsigned MemOp = MemoryFoldTable3[i].MemOp; - unsigned Flags = MemoryFoldTable3[i].Flags; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) { AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, - RegOp, MemOp, + Entry.RegOp, Entry.MemOp, // Index 3, folded load - Flags | TB_INDEX_3 | TB_FOLDED_LOAD); + Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD); } static const X86MemoryFoldTableEntry MemoryFoldTable4[] = { @@ -2001,14 +1990,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 } }; - for (unsigned i = 0, e = array_lengthof(MemoryFoldTable4); i != e; ++i) { - unsigned RegOp = MemoryFoldTable4[i].RegOp; - unsigned MemOp = MemoryFoldTable4[i].MemOp; - unsigned Flags = MemoryFoldTable4[i].Flags; + for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) { AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, - RegOp, MemOp, + Entry.RegOp, Entry.MemOp, // Index 4, folded load - Flags | TB_INDEX_4 | TB_FOLDED_LOAD); + Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD); } } @@ -3820,7 +3806,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, X86::MOVPQIto64rr); if (X86::VR64RegClass.contains(SrcReg)) // Copy from a VR64 register to a GR64 register. - return X86::MOVSDto64rr; + return X86::MMX_MOVD64from64rr; } else if (X86::GR64RegClass.contains(SrcReg)) { // Copy from a GR64 register to a VR128 register. if (X86::VR128XRegClass.contains(DestReg)) @@ -3828,7 +3814,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, X86::MOV64toPQIrr); // Copy from a GR64 register to a VR64 register. if (X86::VR64RegClass.contains(DestReg)) - return X86::MOV64toSDrr; + return X86::MMX_MOVD64to64rr; } // SrcReg(FR32) -> DestReg(GR32) @@ -6413,22 +6399,40 @@ static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) { hasVirtualRegDefsInBasicBlock(*MI1, MBB) && MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) return true; - + return false; } +// TODO: There are many more machine instruction opcodes to match: +// 1. Other data types (integer, vectors) +// 2. Other math / logic operations (and, or) +static bool isAssociativeAndCommutative(unsigned Opcode) { + switch (Opcode) { + case X86::ADDSDrr: + case X86::ADDSSrr: + case X86::VADDSDrr: + case X86::VADDSSrr: + case X86::MULSDrr: + case X86::MULSSrr: + case X86::VMULSDrr: + case X86::VMULSSrr: + return true; + default: + return false; + } +} + /// Return true if the input instruction is part of a chain of dependent ops /// that are suitable for reassociation, otherwise return false. /// If the instruction's operands must be commuted to have a previous /// instruction of the same type define the first source operand, Commuted will /// be set to true. -static bool isReassocCandidate(const MachineInstr &Inst, unsigned AssocOpcode, - bool &Commuted) { - // 1. The instruction must have the correct type. +static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) { + // 1. The operation must be associative and commutative. // 2. The instruction must have virtual register definitions for its // operands in the same basic block. - // 3. The instruction must have a reassociatable sibling. - if (Inst.getOpcode() == AssocOpcode && + // 3. The instruction must have a reassociable sibling. + if (isAssociativeAndCommutative(Inst.getOpcode()) && hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) && hasReassocSibling(Inst, Commuted)) return true; @@ -6455,14 +6459,8 @@ bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root, // B = A op X (Prev) // C = B op Y (Root) - // TODO: There are many more associative instruction types to match: - // 1. Other forms of scalar FP add (non-AVX) - // 2. Other data types (double, integer, vectors) - // 3. Other math / logic operations (mul, and, or) - unsigned AssocOpcode = X86::VADDSSrr; - - bool Commute = false; - if (isReassocCandidate(Root, AssocOpcode, Commute)) { + bool Commute; + if (isReassocCandidate(Root, Commute)) { // We found a sequence of instructions that may be suitable for a // reassociation of operands to increase ILP. Specify each commutation // possibility for the Prev instruction in the sequence and let the @@ -6512,7 +6510,7 @@ static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]); MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]); MachineOperand &OpC = Root.getOperand(0); - + unsigned RegA = OpA.getReg(); unsigned RegB = OpB.getReg(); unsigned RegX = OpX.getReg(); @@ -6547,7 +6545,7 @@ static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, .addReg(RegX, getKillRegState(KillX)) .addReg(RegY, getKillRegState(KillY)); InsInstrs.push_back(MIB1); - + MachineInstrBuilder MIB2 = BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) .addReg(RegA, getKillRegState(KillA)) @@ -6579,7 +6577,7 @@ void X86InstrInfo::genAlternativeCodeSequence( Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); } assert(Prev && "Unknown pattern for machine combiner"); - + reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); return; } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 6f38cb8eaf33..52bab9c79b45 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -194,7 +194,7 @@ def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void, def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; -def X86RecoverFrameAlloc : SDNode<"ISD::FRAME_ALLOC_RECOVER", +def X86RecoverFrameAlloc : SDNode<"ISD::LOCAL_RECOVER", SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisInt<1>]>>; @@ -1028,14 +1028,13 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[], IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>; def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm), - "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16, - Requires<[Not64BitMode]>; + "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16; +def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), + "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16; + def PUSH32i8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), "push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32, Requires<[Not64BitMode]>; -def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), - "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16, - Requires<[Not64BitMode]>; def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), "push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32, Requires<[Not64BitMode]>; @@ -1081,9 +1080,6 @@ let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1, SchedRW = [WriteStore] in { def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), "push{q}\t$imm", [], IIC_PUSH_IMM>, Requires<[In64BitMode]>; -def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), - "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16, - Requires<[In64BitMode]>; def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm), "push{q}\t$imm", [], IIC_PUSH_IMM>, OpSize32, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2a896dfe8aa8..a5ff9edf05a3 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4035,13 +4035,13 @@ defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, SSE_INTALU_ITINS_P, 0>; defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, SSE_INTALU_ITINS_P, 0>; -defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8, +defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, SSE_INTALU_ITINS_P, 1>; -defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16, +defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16, SSE_INTALU_ITINS_P, 1>; -defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8, +defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8, SSE_INTALU_ITINS_P, 1>; -defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16, +defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16, SSE_INTALU_ITINS_P, 1>; // Intrinsic forms @@ -6834,29 +6834,28 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, } let Predicates = [HasAVX, NoVLX] in { - let isCommutable = 0 in - defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128, + defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V; - defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128, + defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V; - defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128, + defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V; - defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128, + defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V; - defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128, + defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V; - defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128, + defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V; - defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128, + defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V; - defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128, + defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V; defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32, @@ -6865,29 +6864,28 @@ let Predicates = [HasAVX, NoVLX] in { } let Predicates = [HasAVX2, NoVLX] in { - let isCommutable = 0 in - defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256, + defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L; - defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256, + defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L; - defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256, + defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L; - defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256, + defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L; - defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256, + defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L; - defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256, + defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L; - defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256, + defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L; - defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256, + defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L; defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32, @@ -6896,22 +6894,21 @@ let Predicates = [HasAVX2, NoVLX] in { } let Constraints = "$src1 = $dst" in { - let isCommutable = 0 in - defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, + defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; - defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, + defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; - defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128, + defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; - defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128, + defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; - defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128, + defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; - defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128, + defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; - defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128, + defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; - defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128, + defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMULDQ : SS48I_binop_rm2<0x28, "pmuldq", X86pmuldq, v2i64, v4i32, VR128, memopv2i64, i128mem, @@ -7773,7 +7770,7 @@ let Constraints = "$src = $dst" in { def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), (ins VR128:$src, u8imm:$len, u8imm:$idx), "extrq\t{$idx, $len, $src|$src, $len, $idx}", - [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len, + [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len, imm:$idx))]>, PD; def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), @@ -7784,8 +7781,8 @@ def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", - [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src, - VR128:$src2, imm:$len, imm:$idx))]>, XD; + [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, + imm:$len, imm:$idx))]>, XD; def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), "insertq\t{$mask, $src|$src, $mask}", diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 61a33484b8bf..2c8b95bcba22 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -19,7 +19,7 @@ namespace llvm { enum IntrinsicType { INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, - INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, + INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK, @@ -213,18 +213,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0), X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0), - X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, ISD::UMIN, 0), X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0), X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0), X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0), @@ -596,60 +596,69 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmins_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_b_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_b_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_b_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_d_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_d_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_d_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_q_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_q_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_q_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_w_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_w_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pmins_w_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_b_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_b_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_b_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_d_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_d_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_d_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_q_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_q_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_q_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), + X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK, X86ISD::PMULDQ, 0), X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK, X86ISD::PMULDQ, 0), X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK, X86ISD::PMULDQ, 0), + X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_128, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_256, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_512, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmulh_w_128, INTR_TYPE_2OP_MASK, ISD::MULHS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmulh_w_256, INTR_TYPE_2OP_MASK, ISD::MULHS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmulh_w_512, INTR_TYPE_2OP_MASK, ISD::MULHS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_128, INTR_TYPE_2OP_MASK, ISD::MULHU, 0), + X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_256, INTR_TYPE_2OP_MASK, ISD::MULHU, 0), + X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_512, INTR_TYPE_2OP_MASK, ISD::MULHU, 0), X86_INTRINSIC_DATA(avx512_mask_pmull_d_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0), X86_INTRINSIC_DATA(avx512_mask_pmull_d_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0), X86_INTRINSIC_DATA(avx512_mask_pmull_d_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0), @@ -1008,10 +1017,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(sse2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0), + X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0), + X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0), + X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0), + X86_INTRINSIC_DATA(sse2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0), X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0), X86_INTRINSIC_DATA(sse2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0), @@ -1049,14 +1058,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse3_hsub_ps, INTR_TYPE_2OP, X86ISD::FHSUB, 0), X86_INTRINSIC_DATA(sse41_insertps, INTR_TYPE_3OP, X86ISD::INSERTPS, 0), X86_INTRINSIC_DATA(sse41_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(sse41_pmaxsb, INTR_TYPE_2OP, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(sse41_pmaxsd, INTR_TYPE_2OP, X86ISD::SMAX, 0), - X86_INTRINSIC_DATA(sse41_pmaxud, INTR_TYPE_2OP, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(sse41_pmaxuw, INTR_TYPE_2OP, X86ISD::UMAX, 0), - X86_INTRINSIC_DATA(sse41_pminsb, INTR_TYPE_2OP, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, X86ISD::SMIN, 0), - X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, X86ISD::UMIN, 0), - X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, X86ISD::UMIN, 0), + X86_INTRINSIC_DATA(sse41_pmaxsb, INTR_TYPE_2OP, ISD::SMAX, 0), + X86_INTRINSIC_DATA(sse41_pmaxsd, INTR_TYPE_2OP, ISD::SMAX, 0), + X86_INTRINSIC_DATA(sse41_pmaxud, INTR_TYPE_2OP, ISD::UMAX, 0), + X86_INTRINSIC_DATA(sse41_pmaxuw, INTR_TYPE_2OP, ISD::UMAX, 0), + X86_INTRINSIC_DATA(sse41_pminsb, INTR_TYPE_2OP, ISD::SMIN, 0), + X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, ISD::SMIN, 0), + X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, ISD::UMIN, 0), + X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, ISD::UMIN, 0), X86_INTRINSIC_DATA(sse41_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0), X86_INTRINSIC_DATA(sse41_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0), X86_INTRINSIC_DATA(sse41_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0), @@ -1070,6 +1079,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse41_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0), X86_INTRINSIC_DATA(sse41_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0), X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0), + X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0), + X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0), X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ), X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE), X86_INTRINSIC_DATA(sse_comigt_ss, COMI, X86ISD::COMI, ISD::SETGT), diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index d598b55aae3e..e6db9708b677 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -30,59 +30,67 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// pointer for reasons other than it containing dynamic allocation or /// that FP eliminatation is turned off. For example, Cygwin main function /// contains stack pointer re-alignment code which requires FP. - bool ForceFramePointer; + bool ForceFramePointer = false; /// RestoreBasePointerOffset - Non-zero if the function has base pointer /// and makes call to llvm.eh.sjlj.setjmp. When non-zero, the value is a /// displacement from the frame pointer to a slot where the base pointer /// is stashed. - signed char RestoreBasePointerOffset; + signed char RestoreBasePointerOffset = 0; /// CalleeSavedFrameSize - Size of the callee-saved register portion of the /// stack frame in bytes. - unsigned CalleeSavedFrameSize; + unsigned CalleeSavedFrameSize = 0; /// BytesToPopOnReturn - Number of bytes function pops on return (in addition /// to the space used by the return address). /// Used on windows platform for stdcall & fastcall name decoration - unsigned BytesToPopOnReturn; + unsigned BytesToPopOnReturn = 0; /// ReturnAddrIndex - FrameIndex for return slot. - int ReturnAddrIndex; + int ReturnAddrIndex = 0; /// \brief FrameIndex for return slot. - int FrameAddrIndex; + int FrameAddrIndex = 0; /// TailCallReturnAddrDelta - The number of bytes by which return address /// stack slot is moved as the result of tail call optimization. - int TailCallReturnAddrDelta; + int TailCallReturnAddrDelta = 0; /// SRetReturnReg - Some subtargets require that sret lowering includes /// returning the value of the returned struct in a register. This field /// holds the virtual register into which the sret argument is passed. - unsigned SRetReturnReg; + unsigned SRetReturnReg = 0; /// GlobalBaseReg - keeps track of the virtual register initialized for /// use as the global base register. This is used for PIC in some PIC /// relocation models. - unsigned GlobalBaseReg; + unsigned GlobalBaseReg = 0; /// VarArgsFrameIndex - FrameIndex for start of varargs area. - int VarArgsFrameIndex; + int VarArgsFrameIndex = 0; /// RegSaveFrameIndex - X86-64 vararg func register save area. - int RegSaveFrameIndex; + int RegSaveFrameIndex = 0; /// VarArgsGPOffset - X86-64 vararg func int reg offset. - unsigned VarArgsGPOffset; + unsigned VarArgsGPOffset = 0; /// VarArgsFPOffset - X86-64 vararg func fp reg offset. - unsigned VarArgsFPOffset; + unsigned VarArgsFPOffset = 0; /// ArgumentStackSize - The number of bytes on stack consumed by the arguments /// being passed on the stack. - unsigned ArgumentStackSize; + unsigned ArgumentStackSize = 0; /// NumLocalDynamics - Number of local-dynamic TLS accesses. - unsigned NumLocalDynamics; + unsigned NumLocalDynamics = 0; /// HasPushSequences - Keeps track of whether this function uses sequences /// of pushes to pass function parameters. - bool HasPushSequences; + bool HasPushSequences = false; + + /// True if the function uses llvm.x86.seh.restoreframe, and it needed a spill + /// slot for the frame pointer. + bool HasSEHFramePtrSave = false; + + /// The frame index of a stack object containing the original frame pointer + /// used to address arguments in a function using a base pointer. + int SEHFramePtrSaveIndex = 0; private: /// ForwardedMustTailRegParms - A list of virtual and physical registers @@ -90,40 +98,9 @@ private: SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms; public: - X86MachineFunctionInfo() : ForceFramePointer(false), - RestoreBasePointerOffset(0), - CalleeSavedFrameSize(0), - BytesToPopOnReturn(0), - ReturnAddrIndex(0), - FrameAddrIndex(0), - TailCallReturnAddrDelta(0), - SRetReturnReg(0), - GlobalBaseReg(0), - VarArgsFrameIndex(0), - RegSaveFrameIndex(0), - VarArgsGPOffset(0), - VarArgsFPOffset(0), - ArgumentStackSize(0), - NumLocalDynamics(0), - HasPushSequences(false) {} - - explicit X86MachineFunctionInfo(MachineFunction &MF) - : ForceFramePointer(false), - RestoreBasePointerOffset(0), - CalleeSavedFrameSize(0), - BytesToPopOnReturn(0), - ReturnAddrIndex(0), - FrameAddrIndex(0), - TailCallReturnAddrDelta(0), - SRetReturnReg(0), - GlobalBaseReg(0), - VarArgsFrameIndex(0), - RegSaveFrameIndex(0), - VarArgsGPOffset(0), - VarArgsFPOffset(0), - ArgumentStackSize(0), - NumLocalDynamics(0), - HasPushSequences(false) {} + X86MachineFunctionInfo() = default; + + explicit X86MachineFunctionInfo(MachineFunction &MF) {}; bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } @@ -174,6 +151,12 @@ public: unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + bool getHasSEHFramePtrSave() const { return HasSEHFramePtrSave; } + void setHasSEHFramePtrSave(bool V) { HasSEHFramePtrSave = V; } + + int getSEHFramePtrSaveIndex() const { return SEHFramePtrSaveIndex; } + void setSEHFramePtrSaveIndex(int Index) { SEHFramePtrSaveIndex = Index; } + SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() { return ForwardedMustTailRegParms; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 0033b5058187..d8495e53e0e3 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -202,7 +202,7 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { unsigned X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const X86FrameLowering *TFI = getFrameLowering(MF); unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; switch (RC->getID()) { @@ -343,7 +343,7 @@ X86RegisterInfo::getNoPreservedMask() const { BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const X86FrameLowering *TFI = getFrameLowering(MF); // Set the stack-pointer register and its aliases as reserved. for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); @@ -452,7 +452,7 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { // use both the SP and the FP, we need a separate base pointer register. bool CantUseFP = needsStackRealignment(MF); bool CantUseSP = - MFI->hasVarSizedObjects() || MFI->hasInlineAsmWithSPAdjust(); + MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment(); return CantUseFP && CantUseSP; } @@ -477,9 +477,9 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86FrameLowering *TFI = getFrameLowering(MF); const Function *F = MF.getFunction(); - unsigned StackAlign = - MF.getSubtarget().getFrameLowering()->getStackAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttribute(Attribute::StackAlignment)); @@ -503,7 +503,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const X86FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); unsigned BasePtr; @@ -519,18 +519,17 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, else BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr); - // FRAME_ALLOC uses a single offset, with no register. It only works in the + // LOCAL_ESCAPE uses a single offset, with no register. It only works in the // simple FP case, and doesn't work with stack realignment. On 32-bit, the // offset is from the traditional base pointer location. On 64-bit, the // offset is from the SP at the end of the prologue, not the FP location. This // matches the behavior of llvm.frameaddress. - if (Opc == TargetOpcode::FRAME_ALLOC) { + if (Opc == TargetOpcode::LOCAL_ESCAPE) { MachineOperand &FI = MI.getOperand(FIOperandNum); bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); int Offset; if (IsWinEH) - Offset = static_cast<const X86FrameLowering *>(TFI) - ->getFrameIndexOffsetFromSP(MF, FrameIndex); + Offset = TFI->getFrameIndexOffsetFromSP(MF, FrameIndex); else Offset = TFI->getFrameIndexOffset(MF, FrameIndex); FI.ChangeToImmediate(Offset); @@ -584,7 +583,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const X86FrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) ? FramePtr : StackPtr; } diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 5ca40bc0091b..ce79fcf9ad81 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -24,11 +24,6 @@ using namespace llvm; #define DEBUG_TYPE "x86-selectiondag-info" -X86SelectionDAGInfo::X86SelectionDAGInfo(const DataLayout &DL) - : TargetSelectionDAGInfo(&DL) {} - -X86SelectionDAGInfo::~X86SelectionDAGInfo() {} - bool X86SelectionDAGInfo::isBaseRegConflictPossible( SelectionDAG &DAG, ArrayRef<unsigned> ClobberSet) const { // We cannot use TRI->hasBasePointer() until *after* we select all basic @@ -37,7 +32,7 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible( // dynamic stack adjustments (hopefully rare) and the base pointer would // conflict if we had to use it. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - if (!MFI->hasVarSizedObjects() && !MFI->hasInlineAsmWithSPAdjust()) + if (!MFI->hasVarSizedObjects() && !MFI->hasOpaqueSPAdjustment()) return false; const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>( @@ -81,8 +76,9 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { - EVT IntPtr = DAG.getTargetLoweringInfo().getPointerTy(); - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); + EVT IntPtr = + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h index eb7e0ed9de6c..961bd8c8d5ef 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.h +++ b/lib/Target/X86/X86SelectionDAGInfo.h @@ -29,8 +29,7 @@ class X86SelectionDAGInfo : public TargetSelectionDAGInfo { ArrayRef<unsigned> ClobberSet) const; public: - explicit X86SelectionDAGInfo(const DataLayout &DL); - ~X86SelectionDAGInfo(); + explicit X86SelectionDAGInfo() = default; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 3b25d30dc221..dff3624b7efe 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -68,7 +68,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { if (GV->hasDLLImportStorageClass()) return X86II::MO_DLLIMPORT; - bool isDecl = GV->isDeclarationForLinker(); + bool isDef = GV->isStrongDefinitionForLinker(); // X86-64 in PIC mode. if (isPICStyleRIPRel()) { @@ -80,8 +80,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // If symbol visibility is hidden, the extra load is not needed if // target is x86-64 or the symbol is definitely defined in the current // translation unit. - if (GV->hasDefaultVisibility() && - (isDecl || GV->isWeakForLinker())) + if (GV->hasDefaultVisibility() && !isDef) return X86II::MO_GOTPCREL; } else if (!isTargetWin64()) { assert(isTargetELF() && "Unknown rip-relative target"); @@ -107,7 +106,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // If this is a strong reference to a definition, it is definitely not // through a stub. - if (!isDecl && !GV->isWeakForLinker()) + if (isDef) return X86II::MO_PIC_BASE_OFFSET; // Unless we have a symbol with hidden visibility, we have to go through a @@ -117,7 +116,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // If symbol visibility is hidden, we have a stub for common symbol // references and external declarations. - if (isDecl || GV->hasCommonLinkage()) { + if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) { // Hidden $non_lazy_ptr reference. return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE; } @@ -131,7 +130,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // If this is a strong reference to a definition, it is definitely not // through a stub. - if (!isDecl && !GV->isWeakForLinker()) + if (isDef) return X86II::MO_NO_FLAG; // Unless we have a symbol with hidden visibility, we have to go through a @@ -193,12 +192,9 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { FullFS = "+64bit,+sse2"; } - // If feature string is not empty, parse features string. + // Parse features string and set the CPU. ParseSubtargetFeatures(CPUName, FullFS); - // Make sure the right MCSchedModel is used. - InitCPUSchedModel(CPUName); - InstrItins = getInstrItineraryForCPU(CPUName); // It's important to keep the MCSubtargetInfo feature bits in sync with @@ -298,9 +294,8 @@ X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU, TargetTriple.getEnvironment() != Triple::CODE16), In16BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() == Triple::CODE16), - TSInfo(*TM.getDataLayout()), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), - FrameLowering(*this, getStackAlignment()) { + TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)), + TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) { // Determine the PICStyle based on the target selected. if (TM.getRelocationModel() == Reloc::Static) { // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index d420abbe1433..f026d4295f71 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -447,8 +447,26 @@ public: } bool isCallingConvWin64(CallingConv::ID CC) const { - return (isTargetWin64() && CC != CallingConv::X86_64_SysV) || - CC == CallingConv::X86_64_Win64; + switch (CC) { + // On Win64, all these conventions just use the default convention. + case CallingConv::C: + case CallingConv::Fast: + case CallingConv::X86_FastCall: + case CallingConv::X86_StdCall: + case CallingConv::X86_ThisCall: + case CallingConv::X86_VectorCall: + case CallingConv::Intel_OCL_BI: + return isTargetWin64(); + // This convention allows using the Win64 convention on other targets. + case CallingConv::X86_64_Win64: + return true; + // This convention allows using the SysV convention on Windows targets. + case CallingConv::X86_64_SysV: + return false; + // Otherwise, who knows what this is. + default: + return false; + } } /// ClassifyGlobalReference - Classify a global variable reference for the diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 0c82a700952b..7df726091843 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -89,7 +89,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost( TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -117,6 +117,8 @@ unsigned X86TTIImpl::getArithmeticInstrCost( static const CostTblEntry<MVT::SimpleValueType> AVX2UniformConstCostTable[] = { + { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle. + { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence @@ -211,6 +213,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost( { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. { ISD::SRA, MVT::v8i16, 1 }, // psraw. { ISD::SRA, MVT::v4i32, 1 }, // psrad. + { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle. { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence @@ -261,12 +264,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost( { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized. + { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized. { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized. + { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized. // It is not a good idea to vectorize division. We have to scalarize it and @@ -352,7 +355,7 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); if (Kind == TTI::SK_Reverse) { - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); unsigned Cost = 1; if (LT.second.getSizeInBits() > 128) Cost = 3; // Extract + insert + copy. @@ -364,7 +367,7 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, if (Kind == TTI::SK_Alternate) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); // The backend knows how to generate a single VEX.256 version of // instruction VPBLENDW if the target supports AVX2. @@ -464,8 +467,8 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src); - std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst); + std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src); + std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst); static const TypeConversionCostTblEntry<MVT::SimpleValueType> SSE2ConvTbl[] = { @@ -537,8 +540,8 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { if (Idx != -1) return AVX512ConversionTbl[Idx].Cost; } - EVT SrcTy = TLI->getValueType(Src); - EVT DstTy = TLI->getValueType(Dst); + EVT SrcTy = TLI->getValueType(DL, Src); + EVT DstTy = TLI->getValueType(DL, Dst); // The function getSimpleVT only handles simple value types. if (!SrcTy.isSimple() || !DstTy.isSimple()) @@ -667,7 +670,7 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; @@ -740,7 +743,7 @@ unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, if (Index != -1U) { // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val); // This type is legalized to a scalar type. if (!LT.second.isVector()) @@ -803,7 +806,7 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, } // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); @@ -850,9 +853,9 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, } // Legalize the type. - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(SrcVTy); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy); unsigned Cost = 0; - if (LT.second != TLI->getValueType(SrcVTy).getSimpleVT() && + if (LT.second != TLI->getValueType(DL, SrcVTy).getSimpleVT() && LT.second.getVectorNumElements() == NumElem) // Promotion requires expand/truncate for data and a shuffle for mask. Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) + @@ -887,7 +890,7 @@ unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) { unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy, bool IsPairwise) { - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; @@ -1117,11 +1120,11 @@ unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) { int DataWidth = DataTy->getPrimitiveSizeInBits(); - + // Todo: AVX512 allows gather/scatter, works with strided and random as well if ((DataWidth < 32) || (Consecutive == 0)) return false; - if (ST->hasAVX512() || ST->hasAVX2()) + if (ST->hasAVX512() || ST->hasAVX2()) return true; return false; } diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index a83158440193..da3f36c2e27e 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -40,7 +40,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> { public: explicit X86TTIImpl(const X86TargetMachine *TM, Function &F) - : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. X86TTIImpl(const X86TTIImpl &Arg) @@ -48,18 +49,6 @@ public: X86TTIImpl(X86TTIImpl &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - X86TTIImpl &operator=(const X86TTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - X86TTIImpl &operator=(X86TTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } /// \name Scalar TTI Implementations /// @{ diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index 90357257b9ef..9190d0be9e4d 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -113,8 +113,8 @@ char WinEHStatePass::ID = 0; bool WinEHStatePass::doInitialization(Module &M) { TheModule = &M; - FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::frameescape); - FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::framerecover); + FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::localescape); + FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::localrecover); FrameAddress = Intrinsic::getDeclaration(TheModule, Intrinsic::frameaddress); return false; } @@ -133,7 +133,7 @@ bool WinEHStatePass::doFinalization(Module &M) { void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const { // This pass should only insert a stack allocation, memory accesses, and - // framerecovers. + // localrecovers. AU.setPreservesCFG(); } @@ -336,9 +336,11 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) { FunctionType *TargetFuncTy = FunctionType::get(Int32Ty, makeArrayRef(&ArgTys[0], 5), /*isVarArg=*/false); - Function *Trampoline = Function::Create( - TrampolineTy, GlobalValue::InternalLinkage, - Twine("__ehhandler$") + ParentFunc->getName(), TheModule); + Function *Trampoline = + Function::Create(TrampolineTy, GlobalValue::InternalLinkage, + Twine("__ehhandler$") + GlobalValue::getRealLinkageName( + ParentFunc->getName()), + TheModule); BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline); IRBuilder<> Builder(EntryBB); Value *LSDA = emitEHLSDA(Builder, ParentFunc); @@ -419,14 +421,14 @@ void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) { } /// Escape RegNode so that we can access it from child handlers. Find the call -/// to frameescape, if any, in the entry block and append RegNode to the list +/// to localescape, if any, in the entry block and append RegNode to the list /// of arguments. int WinEHStatePass::escapeRegNode(Function &F) { - // Find the call to frameescape and extract its arguments. + // Find the call to localescape and extract its arguments. IntrinsicInst *EscapeCall = nullptr; for (Instruction &I : F.getEntryBlock()) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); - if (II && II->getIntrinsicID() == Intrinsic::frameescape) { + if (II && II->getIntrinsicID() == Intrinsic::localescape) { EscapeCall = II; break; } @@ -440,8 +442,10 @@ int WinEHStatePass::escapeRegNode(Function &F) { // Replace the call (if it exists) with new one. Otherwise, insert at the end // of the entry block. - IRBuilder<> Builder(&F.getEntryBlock(), - EscapeCall ? EscapeCall : F.getEntryBlock().end()); + Instruction *InsertPt = EscapeCall; + if (!EscapeCall) + InsertPt = F.getEntryBlock().getTerminator(); + IRBuilder<> Builder(&F.getEntryBlock(), InsertPt); Builder.CreateCall(FrameEscape, Args); if (EscapeCall) EscapeCall->eraseFromParent(); @@ -520,6 +524,11 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) { for (auto &Handler : ActionList) { if (auto *CH = dyn_cast<CatchHandler>(Handler.get())) { auto *BA = cast<BlockAddress>(CH->getHandlerBlockOrFunc()); +#ifndef NDEBUG + for (BasicBlock *Pred : predecessors(BA->getBasicBlock())) + assert(Pred->isLandingPad() && + "WinEHPrepare failed to split block"); +#endif ExceptBlocks.insert(BA->getBasicBlock()); } } diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index ac954d0a8fa4..b4085835f285 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -40,7 +40,7 @@ static MCInstrInfo *createXCoreMCInstrInfo() { return X; } -static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) { +static MCRegisterInfo *createXCoreMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); InitXCoreMCRegisterInfo(X, XCore::LR); return X; @@ -48,9 +48,7 @@ static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) { static MCSubtargetInfo * createXCoreMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitXCoreMCSubtargetInfo(X, TT, CPU, FS); - return X; + return createXCoreMCSubtargetInfoImpl(TT, CPU, FS); } static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI, @@ -64,7 +62,8 @@ static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI, return MAI; } -static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM, +static MCCodeGenInfo *createXCoreMCCodeGenInfo(const Triple &TT, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index bd834cc5be4b..76c3d8130e75 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -525,12 +525,15 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -void XCoreFrameLowering:: -processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +void XCoreFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); - bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + bool LRUsed = MRI.isPhysRegModified(XCore::LR); if (!LRUsed && !MF.getFunction()->isVarArg() && MF.getFrameInfo()->estimateStackSize(MF)) @@ -550,7 +553,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (LRUsed) { // We will handle the LR in the prologue/epilogue // and allocate space on the stack ourselves. - MF.getRegInfo().setPhysRegUnused(XCore::LR); + SavedRegs.reset(XCore::LR); XFI->createLRSpillSlot(MF); } diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index 607c77248952..69c71adc8d3f 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -47,8 +47,8 @@ namespace llvm { bool hasFP(const MachineFunction &MF) const override; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = nullptr) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS = nullptr) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS = nullptr) const override; diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index f5b180b1ac0d..9d4a966dfba4 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -144,10 +144,9 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { MVT::i32, MskSize); } else if (!isUInt<16>(Val)) { - SDValue CPIdx = - CurDAG->getTargetConstantPool(ConstantInt::get( - Type::getInt32Ty(*CurDAG->getContext()), Val), - getTargetLowering()->getPointerTy()); + SDValue CPIdx = CurDAG->getTargetConstantPool( + ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), + getTargetLowering()->getPointerTy(CurDAG->getDataLayout())); SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, MVT::Other, CPIdx, CurDAG->getEntryNode()); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index aa71241102ff..d62e7428299d 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -281,7 +281,8 @@ static bool IsSmallObject(const GlobalValue *GV, const XCoreTargetLowering &XTL) if (!ObjType->isSized()) return false; - unsigned ObjSize = XTL.getDataLayout()->getTypeAllocSize(ObjType); + auto &DL = GV->getParent()->getDataLayout(); + unsigned ObjSize = DL.getTypeAllocSize(ObjType); return ObjSize < CodeModelLargeSize && ObjSize != 0; } @@ -312,8 +313,9 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const Constant *GAI = ConstantExpr::getGetElementPtr( Type::getInt8Ty(*DAG.getContext()), GA, Idx); SDValue CP = DAG.getConstantPool(GAI, MVT::i32); - return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP, - MachinePointerInfo(), false, false, false, 0); + return DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, + DAG.getEntryNode(), CP, MachinePointerInfo(), false, + false, false, 0); } } @@ -321,11 +323,11 @@ SDValue XCoreTargetLowering:: LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - + auto PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy()); + SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT); - return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result); + return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, PtrVT, Result); } SDValue XCoreTargetLowering:: @@ -378,9 +380,10 @@ SDValue XCoreTargetLowering:: lowerLoadWordFromAlignedBasePlusOffset(SDLoc DL, SDValue Chain, SDValue Base, int64_t Offset, SelectionDAG &DAG) const { + auto PtrVT = getPointerTy(DAG.getDataLayout()); if ((Offset & 0x3) == 0) { - return DAG.getLoad(getPointerTy(), DL, Chain, Base, MachinePointerInfo(), - false, false, false, 0); + return DAG.getLoad(PtrVT, DL, Chain, Base, MachinePointerInfo(), false, + false, false, 0); } // Lower to pair of consecutive word aligned loads plus some bit shifting. int32_t HighOffset = RoundUpToAlignment(Offset, 4); @@ -401,11 +404,9 @@ lowerLoadWordFromAlignedBasePlusOffset(SDLoc DL, SDValue Chain, SDValue Base, SDValue LowShift = DAG.getConstant((Offset - LowOffset) * 8, DL, MVT::i32); SDValue HighShift = DAG.getConstant((HighOffset - Offset) * 8, DL, MVT::i32); - SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain, - LowAddr, MachinePointerInfo(), + SDValue Low = DAG.getLoad(PtrVT, DL, Chain, LowAddr, MachinePointerInfo(), false, false, false, 0); - SDValue High = DAG.getLoad(getPointerTy(), DL, Chain, - HighAddr, MachinePointerInfo(), + SDValue High = DAG.getLoad(PtrVT, DL, Chain, HighAddr, MachinePointerInfo(), false, false, false, 0); SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift); SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift); @@ -435,8 +436,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { LD->getAlignment())) return SDValue(); - unsigned ABIAlignment = getDataLayout()-> - getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext())); + auto &TD = DAG.getDataLayout(); + unsigned ABIAlignment = TD.getABITypeAlignment( + LD->getMemoryVT().getTypeForEVT(*DAG.getContext())); // Leave aligned load alone. if (LD->getAlignment() >= ABIAlignment) return SDValue(); @@ -486,7 +488,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } // Lower to a call to __misaligned_load(BasePtr). - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = TD.getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -495,10 +497,11 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(DL).setChain(Chain) - .setCallee(CallingConv::C, IntPtrTy, - DAG.getExternalSymbol("__misaligned_load", getPointerTy()), - std::move(Args), 0); + CLI.setDebugLoc(DL).setChain(Chain).setCallee( + CallingConv::C, IntPtrTy, + DAG.getExternalSymbol("__misaligned_load", + getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); SDValue Ops[] = { CallResult.first, CallResult.second }; @@ -516,8 +519,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const ST->getAlignment())) { return SDValue(); } - unsigned ABIAlignment = getDataLayout()-> - getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext())); + unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment( + ST->getMemoryVT().getTypeForEVT(*DAG.getContext())); // Leave aligned store alone. if (ST->getAlignment() >= ABIAlignment) { return SDValue(); @@ -545,7 +548,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const } // Lower to a call to __misaligned_store(BasePtr, Value). - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -557,10 +560,11 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__misaligned_store", getPointerTy()), - std::move(Args), 0); + CLI.setDebugLoc(dl).setChain(Chain).setCallee( + CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__misaligned_store", + getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.second; @@ -833,9 +837,9 @@ LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); int FI = XFI->createLRSpillSlot(MF); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - return DAG.getLoad(getPointerTy(), SDLoc(Op), DAG.getEntryNode(), FIN, - MachinePointerInfo::getFixedStack(FI), false, false, - false, 0); + return DAG.getLoad( + getPointerTy(DAG.getDataLayout()), SDLoc(Op), DAG.getEntryNode(), FIN, + MachinePointerInfo::getFixedStack(FI), false, false, false, 0); } SDValue XCoreTargetLowering:: @@ -979,11 +983,10 @@ LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { if (N->getMemoryVT() == MVT::i32) { if (N->getAlignment() < 4) report_fatal_error("atomic load must be aligned"); - return DAG.getLoad(getPointerTy(), SDLoc(Op), N->getChain(), - N->getBasePtr(), N->getPointerInfo(), - N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getAlignment(), - N->getAAInfo(), N->getRanges()); + return DAG.getLoad(getPointerTy(DAG.getDataLayout()), SDLoc(Op), + N->getChain(), N->getBasePtr(), N->getPointerInfo(), + N->isVolatile(), N->isNonTemporal(), N->isInvariant(), + N->getAlignment(), N->getAAInfo(), N->getRanges()); } if (N->getMemoryVT() == MVT::i16) { if (N->getAlignment() < 2) @@ -1150,9 +1153,10 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = RetCCInfo.getNextStackOffset(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); - Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, dl, - getPointerTy(), true), dl); + Chain = DAG.getCALLSEQ_START(Chain, + DAG.getConstant(NumBytes, dl, PtrVT, true), dl); SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass; SmallVector<SDValue, 12> MemOpChains; @@ -1239,11 +1243,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getConstant(NumBytes, dl, getPointerTy(), - true), - DAG.getConstant(0, dl, getPointerTy(), true), - InFlag, dl); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, dl, PtrVT, true), + DAG.getConstant(0, dl, PtrVT, true), InFlag, dl); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we @@ -1830,7 +1831,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, if (StoreBits % 8) { break; } - unsigned ABIAlignment = getDataLayout()->getABITypeAlignment( + unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment( ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext())); unsigned Alignment = ST->getAlignment(); if (Alignment >= ABIAlignment) { @@ -1924,15 +1925,13 @@ static inline bool isImmUs4(int64_t val) /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. -bool -XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty, - unsigned AS) const { +bool XCoreTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS) const { if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); - const DataLayout *TD = TM.getDataLayout(); - unsigned Size = TD->getTypeAllocSize(Ty); + unsigned Size = DL.getTypeAllocSize(Ty); if (AM.BaseGV) { return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs%4 == 0; @@ -1970,7 +1969,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, std::pair<unsigned, const TargetRegisterClass *> XCoreTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, + StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 97f0494b6fe3..ddd675c5164d 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -101,7 +101,9 @@ namespace llvm { unsigned getJumpTableEncoding() const override; - MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } + MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override { + return MVT::i32; + } /// LowerOperation - Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; @@ -120,8 +122,8 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override; - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty, - unsigned AS) const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS) const override; private: const TargetMachine &TM; @@ -175,8 +177,7 @@ namespace llvm { // Inline asm support std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - const std::string &Constraint, - MVT VT) const override; + StringRef Constraint, MVT VT) const override; // Expand specifics SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 1d569e8936df..1cfb57dc3af3 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -222,7 +222,7 @@ XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { XCore::R8, XCore::R9, 0 }; - const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + const XCoreFrameLowering *TFI = getFrameLowering(*MF); if (TFI->hasFP(*MF)) return CalleeSavedRegsFP; return CalleeSavedRegs; @@ -230,7 +230,7 @@ XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const XCoreFrameLowering *TFI = getFrameLowering(MF); Reserved.set(XCore::CP); Reserved.set(XCore::DP); @@ -270,7 +270,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const XCoreInstrInfo &TII = *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo()); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const XCoreFrameLowering *TFI = getFrameLowering(MF); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); int StackSize = MF.getFrameInfo()->getStackSize(); @@ -324,7 +324,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const XCoreFrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) ? XCore::R10 : XCore::SP; } diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp index a34884480cea..40568d124de0 100644 --- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp @@ -16,12 +16,6 @@ using namespace llvm; #define DEBUG_TYPE "xcore-selectiondag-info" -XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const DataLayout &DL) - : TargetSelectionDAGInfo(&DL) {} - -XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() { -} - SDValue XCoreSelectionDAGInfo:: EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, @@ -36,18 +30,20 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*DAG.getContext()); + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), - Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()), - std::move(Args), 0) - .setDiscardResult(); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__memcpy_4", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0) + .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h index cfd80b3f3172..77b3527d77e3 100644 --- a/lib/Target/XCore/XCoreSelectionDAGInfo.h +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h @@ -22,8 +22,6 @@ class XCoreTargetMachine; class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit XCoreSelectionDAGInfo(const DataLayout &DL); - ~XCoreSelectionDAGInfo(); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp index c98518b60225..99ad2c88504f 100644 --- a/lib/Target/XCore/XCoreSubtarget.cpp +++ b/lib/Target/XCore/XCoreSubtarget.cpp @@ -28,4 +28,4 @@ void XCoreSubtarget::anchor() { } XCoreSubtarget::XCoreSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM) : XCoreGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this), - TLInfo(TM, *this), TSInfo(*TM.getDataLayout()) {} + TLInfo(TM, *this), TSInfo() {} diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 370b64b26688..f420081868f9 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -85,6 +85,7 @@ extern "C" void LLVMInitializeXCoreTarget() { } TargetIRAnalysis XCoreTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis( - [this](Function &) { return TargetTransformInfo(XCoreTTIImpl(this)); }); + return TargetIRAnalysis([this](Function &F) { + return TargetTransformInfo(XCoreTTIImpl(this, F)); + }); } diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.h b/lib/Target/XCore/XCoreTargetTransformInfo.h index 70b47dfa1156..e23aef3e3b4a 100644 --- a/lib/Target/XCore/XCoreTargetTransformInfo.h +++ b/lib/Target/XCore/XCoreTargetTransformInfo.h @@ -37,8 +37,9 @@ class XCoreTTIImpl : public BasicTTIImplBase<XCoreTTIImpl> { const XCoreTargetLowering *getTLI() const { return TLI; } public: - explicit XCoreTTIImpl(const XCoreTargetMachine *TM) - : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {} + explicit XCoreTTIImpl(const XCoreTargetMachine *TM, Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()), + TLI(ST->getTargetLowering()) {} // Provide value semantics. MSVC requires that we spell all of these out. XCoreTTIImpl(const XCoreTTIImpl &Arg) @@ -46,18 +47,6 @@ public: XCoreTTIImpl(XCoreTTIImpl &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {} - XCoreTTIImpl &operator=(const XCoreTTIImpl &RHS) { - BaseT::operator=(static_cast<const BaseT &>(RHS)); - ST = RHS.ST; - TLI = RHS.TLI; - return *this; - } - XCoreTTIImpl &operator=(XCoreTTIImpl &&RHS) { - BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); - ST = std::move(RHS.ST); - TLI = std::move(RHS.TLI); - return *this; - } unsigned getNumberOfRegisters(bool Vector) { if (Vector) { diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index f75436328252..4762011d63d8 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -825,7 +825,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, V = GetElementPtrInst::Create(SI->first, V, Ops, V->getName() + ".idx", Call); Ops.clear(); - AA.copyValue(OrigLoad->getOperand(0), V); } // Since we're replacing a load make sure we take the alignment // of the previous load. @@ -837,7 +836,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, newLoad->setAAMetadata(AAInfo); Args.push_back(newLoad); - AA.copyValue(OrigLoad, Args.back()); } } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 3df17b920a95..336dac45e13a 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMipo BarrierNoopPass.cpp ConstantMerge.cpp DeadArgumentElimination.cpp + ElimAvailExtern.cpp ExtractGV.cpp FunctionAttrs.cpp GlobalDCE.cpp diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 76898f275058..d0447640259e 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -326,7 +326,18 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { /// instead. bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) { - if (Fn.isDeclaration() || Fn.mayBeOverridden()) + // We cannot change the arguments if this TU does not define the function or + // if the linker may choose a function body from another TU, even if the + // nominal linkage indicates that other copies of the function have the same + // semantics. In the below example, the dead load from %p may not have been + // eliminated from the linker-chosen copy of f, so replacing %p with undef + // in callers may introduce undefined behavior. + // + // define linkonce_odr void @f(i32* %p) { + // %v = load i32 %p + // ret void + // } + if (!Fn.isStrongDefinitionForLinker()) return false; // Functions with local linkage should already have been handled, except the @@ -334,19 +345,6 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg()) return false; - // If a function seen at compile time is not necessarily the one linked to - // the binary being built, it is illegal to change the actual arguments - // passed to it. These functions can be captured by isWeakForLinker(). - // *NOTE* that mayBeOverridden() is insufficient for this purpose as it - // doesn't include linkage types like AvailableExternallyLinkage and - // LinkOnceODRLinkage. Take link_odr* as an example, it indicates a set of - // *EQUIVALENT* globals that can be merged at link-time. However, the - // semantic of *EQUIVALENT*-functions includes parameters. Changing - // parameters breaks this assumption. - // - if (Fn.isWeakForLinker()) - return false; - if (Fn.use_empty()) return false; diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp new file mode 100644 index 000000000000..67ba72d6a360 --- /dev/null +++ b/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -0,0 +1,84 @@ +//===-- ElimAvailExtern.cpp - DCE unreachable internal functions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transform is designed to eliminate available external global +// definitions from the program, turning them into declarations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" +#include "llvm/Pass.h" +using namespace llvm; + +#define DEBUG_TYPE "elim-avail-extern" + +STATISTIC(NumFunctions, "Number of functions removed"); +STATISTIC(NumVariables, "Number of global variables removed"); + +namespace { + struct EliminateAvailableExternally : public ModulePass { + static char ID; // Pass identification, replacement for typeid + EliminateAvailableExternally() : ModulePass(ID) { + initializeEliminateAvailableExternallyPass( + *PassRegistry::getPassRegistry()); + } + + // run - Do the EliminateAvailableExternally pass on the specified module, + // optionally updating the specified callgraph to reflect the changes. + // + bool runOnModule(Module &M) override; + }; +} + +char EliminateAvailableExternally::ID = 0; +INITIALIZE_PASS(EliminateAvailableExternally, "elim-avail-extern", + "Eliminate Available Externally Globals", false, false) + +ModulePass *llvm::createEliminateAvailableExternallyPass() { + return new EliminateAvailableExternally(); +} + +bool EliminateAvailableExternally::runOnModule(Module &M) { + bool Changed = false; + + // Drop initializers of available externally global variables. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + if (!I->hasAvailableExternallyLinkage()) + continue; + if (I->hasInitializer()) { + Constant *Init = I->getInitializer(); + I->setInitializer(nullptr); + if (isSafeToDestroyConstant(Init)) + Init->destroyConstant(); + } + I->removeDeadConstantUsers(); + I->setLinkage(GlobalValue::ExternalLinkage); + NumVariables++; + } + + // Drop the bodies of available externally functions. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (!I->hasAvailableExternallyLinkage()) + continue; + if (!I->isDeclaration()) + // This will set the linkage to external + I->deleteBody(); + I->removeDeadConstantUsers(); + NumFunctions++; + } + + return Changed; +} diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 2f8c7d9349b9..b9462f2ffc72 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -93,8 +93,11 @@ namespace { makeVisible(*I, Delete); - if (Delete) + if (Delete) { + // Make this a declaration and drop it's comdat. I->setInitializer(nullptr); + I->setComdat(nullptr); + } } // Visit the Functions. @@ -108,8 +111,11 @@ namespace { makeVisible(*I, Delete); - if (Delete) + if (Delete) { + // Make this a declaration and drop it's comdat. I->deleteBody(); + I->setComdat(nullptr); + } } // Visit the Aliases. diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index fcacec3286fa..50f56b0f2afe 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -46,6 +46,7 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeStripDeadDebugInfoPass(Registry); initializeStripNonDebugSymbolsPass(Registry); initializeBarrierNoopPass(Registry); + initializeEliminateAvailableExternallyPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 963f1bb13aaf..88e5e479136f 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -105,6 +105,7 @@ PassManagerBuilder::PassManagerBuilder() { VerifyInput = false; VerifyOutput = false; MergeFunctions = false; + PrepareForLTO = false; } PassManagerBuilder::~PassManagerBuilder() { @@ -319,8 +320,8 @@ void PassManagerBuilder::populateModulePassManager( // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies - // on the rotated form. - MPM.add(createLoopRotatePass()); + // on the rotated form. Disable header duplication at -Oz. + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. @@ -401,6 +402,17 @@ void PassManagerBuilder::populateModulePassManager( // GlobalOpt already deletes dead functions and globals, at -O2 try a // late pass of GlobalDCE. It is capable of deleting dead cycles. if (OptLevel > 1) { + if (!PrepareForLTO) { + // Remove avail extern fns and globals definitions if we aren't + // compiling an object file for later LTO. For LTO we want to preserve + // these so they are eligible for inlining at link-time. Note if they + // are unreferenced they will be removed by GlobalDCE below, so + // this only impacts referenced available externally globals. + // Eventually they will be suppressed during codegen, but eliminating + // here enables more opportunity for GlobalDCE as it may make + // globals referenced by available external functions dead. + MPM.add(createEliminateAvailableExternallyPass()); + } MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. MPM.add(createConstantMergePass()); // Merge dup global constants } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 010b7b57c3e7..0bd6fd2f226d 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3928,8 +3928,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = - SimplifyFCmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC, &I)) + if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, + I.getFastMathFlags(), DL, TLI, DT, AC, &I)) return ReplaceInstUsesWith(I, V); // Simplify 'fcmp pred X, X' diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 97ea8df757f8..ac934f1bd85c 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H #define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetFolder.h" @@ -177,6 +178,8 @@ private: // Mode in which we are running the combiner. const bool MinimizeSize; + AliasAnalysis *AA; + // Required analyses. // FIXME: These can never be null and should be references. AssumptionCache *AC; @@ -192,10 +195,11 @@ private: public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, - bool MinimizeSize, AssumptionCache *AC, TargetLibraryInfo *TLI, + bool MinimizeSize, AliasAnalysis *AA, + AssumptionCache *AC, TargetLibraryInfo *TLI, DominatorTree *DT, const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), - AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {} + AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {} /// \brief Run the combiner over the entire worklist until it is empty. /// diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index e7a45330d955..e3179dbeece8 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -749,10 +749,25 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // where there are several consecutive memory accesses to the same location, // separated by a few arithmetic operations. BasicBlock::iterator BBI = &LI; - if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) + AAMDNodes AATags; + if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI, + 6, AA, &AATags)) { + if (LoadInst *NLI = dyn_cast<LoadInst>(AvailableVal)) { + unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, + LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, + LLVMContext::MD_range, + LLVMContext::MD_invariant_load, + LLVMContext::MD_nonnull, + }; + combineMetadata(NLI, &LI, KnownIDs); + }; + return ReplaceInstUsesWith( LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(), LI.getName() + ".cast")); + } // load(gep null, ...) -> unreachable if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 24446c8578e0..273047279e90 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -14,6 +14,8 @@ #include "InstCombineInternal.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -60,56 +62,6 @@ static bool CheapToScalarize(Value *V, bool isConstant) { return false; } -/// FindScalarElement - Given a vector and an element number, see if the scalar -/// value is already around as a register, for example if it were inserted then -/// extracted from the vector. -static Value *FindScalarElement(Value *V, unsigned EltNo) { - assert(V->getType()->isVectorTy() && "Not looking at a vector?"); - VectorType *VTy = cast<VectorType>(V->getType()); - unsigned Width = VTy->getNumElements(); - if (EltNo >= Width) // Out of range access. - return UndefValue::get(VTy->getElementType()); - - if (Constant *C = dyn_cast<Constant>(V)) - return C->getAggregateElement(EltNo); - - if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { - // If this is an insert to a variable element, we don't know what it is. - if (!isa<ConstantInt>(III->getOperand(2))) - return nullptr; - unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); - - // If this is an insert to the element we are looking for, return the - // inserted value. - if (EltNo == IIElt) - return III->getOperand(1); - - // Otherwise, the insertelement doesn't modify the value, recurse on its - // vector input. - return FindScalarElement(III->getOperand(0), EltNo); - } - - if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { - unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); - int InEl = SVI->getMaskValue(EltNo); - if (InEl < 0) - return UndefValue::get(VTy->getElementType()); - if (InEl < (int)LHSWidth) - return FindScalarElement(SVI->getOperand(0), InEl); - return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); - } - - // Extract a value from a vector add operation with a constant zero. - Value *Val = nullptr; Constant *Con = nullptr; - if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { - if (Con->getAggregateElement(EltNo)->isNullValue()) - return FindScalarElement(Val, EltNo); - } - - // Otherwise, we don't know. - return nullptr; -} - // If we have a PHI node with a vector type that has only 2 uses: feed // itself and be an operand of extractelement at a constant location, // try to replace the PHI of the vector type with a PHI of a scalar type. @@ -178,6 +130,10 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { } Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { + if (Value *V = SimplifyExtractElementInst( + EI.getVectorOperand(), EI.getIndexOperand(), DL, TLI, DT, AC)) + return ReplaceInstUsesWith(EI, V); + // If vector val is constant with all elements the same, replace EI with // that element. We handle a known element # below. if (Constant *C = dyn_cast<Constant>(EI.getOperand(0))) @@ -190,10 +146,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { unsigned IndexVal = IdxC->getZExtValue(); unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); - // If this is extracting an invalid index, turn this into undef, to avoid - // crashing the code below. - if (IndexVal >= VectorWidth) - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); + // InstSimplify handles cases where the index is invalid. + assert(IndexVal < VectorWidth); // This instruction only demands the single element from the input vector. // If the input vector has a single use, simplify it based on this use @@ -209,16 +163,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { } } - if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal)) - return ReplaceInstUsesWith(EI, Elt); - // If the this extractelement is directly using a bitcast from a vector of // the same number of elements, see if we can find the source element from // it. In this case, we will end up needing to bitcast the scalars. if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) if (VT->getNumElements() == VectorWidth) - if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) + if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal)) return new BitCastInst(Elt, EI.getType()); } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 2a81689f7449..fd34a244f271 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2174,16 +2174,9 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (!EV.hasIndices()) return ReplaceInstUsesWith(EV, Agg); - if (Constant *C = dyn_cast<Constant>(Agg)) { - if (Constant *C2 = C->getAggregateElement(*EV.idx_begin())) { - if (EV.getNumIndices() == 0) - return ReplaceInstUsesWith(EV, C2); - // Extract the remaining indices out of the constant indexed by the - // first index - return ExtractValueInst::Create(C2, EV.getIndices().slice(1)); - } - return nullptr; // Can't handle other constants - } + if (Value *V = + SimplifyExtractValueInst(Agg, EV.getIndices(), DL, TLI, DT, AC)) + return ReplaceInstUsesWith(EV, V); if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { // We're extracting from an insertvalue instruction, compare the indices @@ -2972,8 +2965,9 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, static bool combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, - AssumptionCache &AC, TargetLibraryInfo &TLI, - DominatorTree &DT, LoopInfo *LI = nullptr) { + AliasAnalysis *AA, AssumptionCache &AC, + TargetLibraryInfo &TLI, DominatorTree &DT, + LoopInfo *LI = nullptr) { // Minimizing size? bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize); auto &DL = F.getParent()->getDataLayout(); @@ -2998,7 +2992,8 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, if (prepareICWorklistFromFunction(F, DL, &TLI, Worklist)) Changed = true; - InstCombiner IC(Worklist, &Builder, MinimizeSize, &AC, &TLI, &DT, DL, LI); + InstCombiner IC(Worklist, &Builder, MinimizeSize, + AA, &AC, &TLI, &DT, DL, LI); if (IC.run()) Changed = true; @@ -3017,7 +3012,8 @@ PreservedAnalyses InstCombinePass::run(Function &F, auto *LI = AM->getCachedResult<LoopAnalysis>(F); - if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI)) + // FIXME: The AliasAnalysis is not yet supported in the new pass manager + if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3050,6 +3046,7 @@ public: void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); @@ -3061,6 +3058,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { return false; // Required analyses. + auto AA = &getAnalysis<AliasAnalysis>(); auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -3069,7 +3067,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI); + return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, LI); } char InstructionCombiningPass::ID = 0; @@ -3078,6 +3076,7 @@ INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 60903c8b4aaf..d1eba6e70e57 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -656,11 +656,14 @@ namespace { LeaderTableEntry* Prev = nullptr; LeaderTableEntry* Curr = &LeaderTable[N]; - while (Curr->Val != I || Curr->BB != BB) { + while (Curr && (Curr->Val != I || Curr->BB != BB)) { Prev = Curr; Curr = Curr->Next; } + if (!Curr) + return; + if (Prev) { Prev->Next = Curr->Next; } else { @@ -1304,11 +1307,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, if (V->getType()->getScalarType()->isPointerTy()) { AliasAnalysis *AA = gvn.getAliasAnalysis(); - for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) - AA->copyValue(LI, NewPHIs[i]); - - // Now that we've copied information to the new PHIs, scan through - // them again and inform alias analysis that we've added potentially + // Scan the new PHIs and inform alias analysis that we've added potentially // escaping uses to any values that are operands to these PHIs. for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { PHINode *P = NewPHIs[i]; @@ -1796,7 +1795,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) { // In general, GVN unifies expressions over different control-flow // regions, and so we need a conservative combination of the noalias // scopes. - unsigned KnownIDs[] = { + static const unsigned KnownIDs[] = { LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, LLVMContext::MD_noalias, diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 6f0375487af6..2a954d9961f2 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -135,6 +136,10 @@ namespace { PHINode *IndVar, SCEVExpander &Rewriter); void SinkUnusedInvariants(Loop *L); + + Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L, + Instruction *InsertPt, Type *Ty, + bool &IsHighCostExpansion); }; } @@ -496,6 +501,52 @@ struct RewritePhi { }; } +Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, + Loop *L, Instruction *InsertPt, + Type *ResultTy, + bool &IsHighCostExpansion) { + using namespace llvm::PatternMatch; + + if (!Rewriter.isHighCostExpansion(S, L)) { + IsHighCostExpansion = false; + return Rewriter.expandCodeFor(S, ResultTy, InsertPt); + } + + // Before expanding S into an expensive LLVM expression, see if we can use an + // already existing value as the expansion for S. There is potential to make + // this significantly smarter, but this simple heuristic already gets some + // interesting cases. + + SmallVector<BasicBlock *, 4> Latches; + L->getLoopLatches(Latches); + + for (BasicBlock *BB : Latches) { + ICmpInst::Predicate Pred; + Instruction *LHS, *RHS; + BasicBlock *TrueBB, *FalseBB; + + if (!match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), + TrueBB, FalseBB))) + continue; + + if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) { + IsHighCostExpansion = false; + return LHS; + } + + if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) { + IsHighCostExpansion = false; + return RHS; + } + } + + // We didn't find anything, fall back to using SCEVExpander. + assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!"); + IsHighCostExpansion = true; + return Rewriter.expandCodeFor(S, ResultTy, InsertPt); +} + //===----------------------------------------------------------------------===// // RewriteLoopExitValues - Optimize IV users outside the loop. // As a side effect, reduces the amount of IV processing within the loop. @@ -628,7 +679,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { continue; } - Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); + bool HighCost = false; + Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, + PN->getType(), HighCost); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); @@ -637,7 +690,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { DeadInsts.push_back(ExitVal); continue; } - bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L); // Collect all the candidate PHINodes to be rewritten. RewritePhiSet.push_back( diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index f0e6d641b180..43fc50e588f8 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -602,7 +602,8 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. while (!I.use_empty()) { - Instruction *User = I.user_back(); + Value::user_iterator UI = I.user_begin(); + auto *User = cast<Instruction>(*UI); if (!DT->isReachableFromEntry(User->getParent())) { User->replaceUsesOfWith(&I, UndefValue::get(I.getType())); continue; @@ -610,6 +611,16 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, // The user must be a PHI node. PHINode *PN = cast<PHINode>(User); + // Surprisingly, instructions can be used outside of loops without any + // exits. This can only happen in PHI nodes if the incoming block is + // unreachable. + Use &U = UI.getUse(); + BasicBlock *BB = PN->getIncomingBlock(U); + if (!DT->isReachableFromEntry(BB)) { + U = UndefValue::get(I.getType()); + continue; + } + BasicBlock *ExitBlock = PN->getParent(); assert(ExitBlockSet.count(ExitBlock) && "The LCSSA PHI is not in an exit block!"); diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp index 0325d268c325..1b9859b57790 100644 --- a/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/lib/Transforms/Scalar/LoopDistribute.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" #include <list> #define LDIST_NAME "loop-distribute" @@ -55,70 +56,6 @@ static cl::opt<bool> DistributeNonIfConvertible( STATISTIC(NumLoopsDistributed, "Number of loops distributed"); -/// \brief Remaps instructions in a loop including the preheader. -static void remapInstructionsInLoop(const SmallVectorImpl<BasicBlock *> &Blocks, - ValueToValueMapTy &VMap) { - // Rewrite the code to refer to itself. - for (auto *BB : Blocks) - for (auto &Inst : *BB) - RemapInstruction(&Inst, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); -} - -/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p -/// Blocks. -/// -/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block -/// \p LoopDomBB. Insert the new blocks before block specified in \p Before. -static Loop *cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, - Loop *OrigLoop, ValueToValueMapTy &VMap, - const Twine &NameSuffix, LoopInfo *LI, - DominatorTree *DT, - SmallVectorImpl<BasicBlock *> &Blocks) { - Function *F = OrigLoop->getHeader()->getParent(); - Loop *ParentLoop = OrigLoop->getParentLoop(); - - Loop *NewLoop = new Loop(); - if (ParentLoop) - ParentLoop->addChildLoop(NewLoop); - else - LI->addTopLevelLoop(NewLoop); - - BasicBlock *OrigPH = OrigLoop->getLoopPreheader(); - BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F); - // To rename the loop PHIs. - VMap[OrigPH] = NewPH; - Blocks.push_back(NewPH); - - // Update LoopInfo. - if (ParentLoop) - ParentLoop->addBasicBlockToLoop(NewPH, *LI); - - // Update DominatorTree. - DT->addNewBlock(NewPH, LoopDomBB); - - for (BasicBlock *BB : OrigLoop->getBlocks()) { - BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F); - VMap[BB] = NewBB; - - // Update LoopInfo. - NewLoop->addBasicBlockToLoop(NewBB, *LI); - - // Update DominatorTree. - BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); - DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); - - Blocks.push_back(NewBB); - } - - // Move them physically from the end of the block list. - F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH); - F->getBasicBlockList().splice(Before, F->getBasicBlockList(), - NewLoop->getHeader(), F->end()); - - return NewLoop; -} - namespace { /// \brief Maintains the set of instructions of the loop for a partition before /// cloning. After cloning, it hosts the new loop. @@ -204,7 +141,9 @@ public: ValueToValueMapTy &getVMap() { return VMap; } /// \brief Remaps the cloned instructions using VMap. - void remapInstructions() { remapInstructionsInLoop(ClonedLoopBlocks, VMap); } + void remapInstructions() { + remapInstructionsInBlocks(ClonedLoopBlocks, VMap); + } /// \brief Based on the set of instructions selected for this partition, /// removes the unnecessary ones. @@ -493,15 +432,14 @@ public: /// partitions its entry is set to -1. SmallVector<int, 8> computePartitionSetForPointers(const LoopAccessInfo &LAI) { - const LoopAccessInfo::RuntimePointerCheck *RtPtrCheck = - LAI.getRuntimePointerCheck(); + const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking(); unsigned N = RtPtrCheck->Pointers.size(); SmallVector<int, 8> PtrToPartitions(N); for (unsigned I = 0; I < N; ++I) { - Value *Ptr = RtPtrCheck->Pointers[I]; + Value *Ptr = RtPtrCheck->Pointers[I].PointerValue; auto Instructions = - LAI.getInstructionsForAccess(Ptr, RtPtrCheck->IsWritePtr[I]); + LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr); int &Partition = PtrToPartitions[I]; // First set it to uninitialized. @@ -629,121 +567,6 @@ private: AccessesType Accesses; }; -/// \brief Handles the loop versioning based on memchecks. -class LoopVersioning { -public: - LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, - DominatorTree *DT, - const SmallVector<int, 8> *PtrToPartition = nullptr) - : VersionedLoop(L), NonVersionedLoop(nullptr), - PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {} - - /// \brief Returns true if we need memchecks to disambiguate may-aliasing - /// accesses. - bool needsRuntimeChecks() const { - return LAI.getRuntimePointerCheck()->needsAnyChecking(PtrToPartition); - } - - /// \brief Performs the CFG manipulation part of versioning the loop including - /// the DominatorTree and LoopInfo updates. - void versionLoop(Pass *P) { - Instruction *FirstCheckInst; - Instruction *MemRuntimeCheck; - // Add the memcheck in the original preheader (this is empty initially). - BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader(); - std::tie(FirstCheckInst, MemRuntimeCheck) = - LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition); - assert(MemRuntimeCheck && "called even though needsAnyChecking = false"); - - // Rename the block to make the IR more readable. - MemCheckBB->setName(VersionedLoop->getHeader()->getName() + - ".lver.memcheck"); - - // Create empty preheader for the loop (and after cloning for the - // non-versioned loop). - BasicBlock *PH = - SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI); - PH->setName(VersionedLoop->getHeader()->getName() + ".ph"); - - // Clone the loop including the preheader. - // - // FIXME: This does not currently preserve SimplifyLoop because the exit - // block is a join between the two loops. - SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks; - NonVersionedLoop = - cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap, - ".lver.orig", LI, DT, NonVersionedLoopBlocks); - remapInstructionsInLoop(NonVersionedLoopBlocks, VMap); - - // Insert the conditional branch based on the result of the memchecks. - Instruction *OrigTerm = MemCheckBB->getTerminator(); - BranchInst::Create(NonVersionedLoop->getLoopPreheader(), - VersionedLoop->getLoopPreheader(), MemRuntimeCheck, - OrigTerm); - OrigTerm->eraseFromParent(); - - // The loops merge in the original exit block. This is now dominated by the - // memchecking block. - DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB); - } - - /// \brief Adds the necessary PHI nodes for the versioned loops based on the - /// loop-defined values used outside of the loop. - void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside) { - BasicBlock *PHIBlock = VersionedLoop->getExitBlock(); - assert(PHIBlock && "No single successor to loop exit block"); - - for (auto *Inst : DefsUsedOutside) { - auto *NonVersionedLoopInst = cast<Instruction>(VMap[Inst]); - PHINode *PN; - - // First see if we have a single-operand PHI with the value defined by the - // original loop. - for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) { - assert(PN->getNumOperands() == 1 && - "Exit block should only have on predecessor"); - if (PN->getIncomingValue(0) == Inst) - break; - } - // If not create it. - if (!PN) { - PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver", - PHIBlock->begin()); - for (auto *User : Inst->users()) - if (!VersionedLoop->contains(cast<Instruction>(User)->getParent())) - User->replaceUsesOfWith(Inst, PN); - PN->addIncoming(Inst, VersionedLoop->getExitingBlock()); - } - // Add the new incoming value from the non-versioned loop. - PN->addIncoming(NonVersionedLoopInst, - NonVersionedLoop->getExitingBlock()); - } - } - -private: - /// \brief The original loop. This becomes the "versioned" one, i.e. control - /// goes if the memchecks all pass. - Loop *VersionedLoop; - /// \brief The fall-back loop, i.e. if any of the memchecks fail. - Loop *NonVersionedLoop; - - /// \brief For each memory pointer it contains the partitionId it is used in. - /// If nullptr, no partitioning is used. - /// - /// The I-th entry corresponds to I-th entry in LAI.getRuntimePointerCheck(). - /// If the pointer is used in multiple partitions the entry is set to -1. - const SmallVector<int, 8> *PtrToPartition; - - /// \brief This maps the instructions from VersionedLoop to their counterpart - /// in NonVersionedLoop. - ValueToValueMapTy VMap; - - /// \brief Analyses used. - const LoopAccessInfo &LAI; - LoopInfo *LI; - DominatorTree *DT; -}; - /// \brief Returns the instructions that use values defined in the loop. static SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L) { SmallVector<Instruction *, 8> UsedOutside; @@ -929,7 +752,7 @@ private: LoopVersioning LVer(LAI, L, LI, DT, &PtrToPartition); if (LVer.needsRuntimeChecks()) { DEBUG(dbgs() << "\nPointers:\n"); - DEBUG(LAI.getRuntimePointerCheck()->print(dbgs(), 0, &PtrToPartition)); + DEBUG(LAI.getRuntimePointerChecking()->print(dbgs(), 0, &PtrToPartition)); LVer.versionLoop(this); LVer.addPHINodes(DefsUsedOutside); } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 714ce914a8b3..a21ca2417ca1 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -508,7 +508,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst, ICmpInst *NewPreCond = cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1)); - PreCond->replaceAllUsesWith(NewPreCond); + PreCondBr->setCondition(NewPreCond); RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI); } diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 25546553fd4d..9d7e57ffebac 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -282,21 +282,21 @@ static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) { DEBUG(dbgs() << "Calling populateWorklist called\n"); LoopVector LoopList; Loop *CurrentLoop = &L; - std::vector<Loop *> vec = CurrentLoop->getSubLoopsVector(); - while (vec.size() != 0) { + const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops(); + while (!Vec->empty()) { // The current loop has multiple subloops in it hence it is not tightly // nested. // Discard all loops above it added into Worklist. - if (vec.size() != 1) { + if (Vec->size() != 1) { LoopList.clear(); return; } LoopList.push_back(CurrentLoop); - CurrentLoop = *(vec.begin()); - vec = CurrentLoop->getSubLoopsVector(); + CurrentLoop = Vec->front(); + Vec = &CurrentLoop->getSubLoops(); } LoopList.push_back(CurrentLoop); - V.push_back(LoopList); + V.push_back(std::move(LoopList)); } static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 9e7558d9c45f..d78db6c369b3 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -840,8 +840,10 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Reduce count based on the type of unrolling and the threshold values. unsigned OriginalCount = Count; - bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime; - if (HasRuntimeUnrollDisablePragma(L)) { + bool AllowRuntime = + (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime); + // Don't unroll a runtime trip count loop with unroll full pragma. + if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) { AllowRuntime = false; } if (Unrolling == Partial) { diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index 243db8d70ca2..643f3740eedd 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -301,10 +301,6 @@ void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB, // Merged instruction Instruction *HoistedInst = HoistCand->clone(); - // Notify AA of the new value. - if (isa<LoadInst>(HoistCand)) - AA->copyValue(HoistCand, HoistedInst); - // Hoist instruction. HoistedInst->insertBefore(HoistPt); @@ -451,9 +447,6 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0, NewPN->addIncoming(Opd1, S0->getParent()); NewPN->addIncoming(Opd2, S1->getParent()); if (NewPN->getType()->getScalarType()->isPointerTy()) { - // Notify AA of the new value. - AA->copyValue(Opd1, NewPN); - AA->copyValue(Opd2, NewPN); // AA needs to be informed when a PHI-use of the pointer value is added for (unsigned I = 0, E = NewPN->getNumIncomingValues(); I != E; ++I) { unsigned J = PHINode::getOperandNumForIncomingValue(I); @@ -491,7 +484,6 @@ bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0, // Create the new store to be inserted at the join point. StoreInst *SNew = (StoreInst *)(S0->clone()); Instruction *ANew = A0->clone(); - AA->copyValue(S0, SNew); SNew->insertBefore(InsertPt); ANew->insertBefore(SNew); diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp index 9ecaf102574a..366301ad731a 100644 --- a/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -399,8 +399,8 @@ static bool doesNotRequireEntrySafepointBefore(const CallSite &CS) { // at least if they do, are leaf functions that cause only finite stack // growth. In particular, the optimizer likes to form things like memsets // out of stores in the original IR. Another important example is - // llvm.frameescape which must occur in the entry block. Inserting a - // safepoint before it is not legal since it could push the frameescape + // llvm.localescape which must occur in the entry block. Inserting a + // safepoint before it is not legal since it could push the localescape // out of the entry block. return true; } diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 305175ff8f73..4d3a708fa20e 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1799,11 +1799,10 @@ bool IPSCCP::runOnModule(Module &M) { if (!TI->use_empty()) TI->replaceAllUsesWith(UndefValue::get(TI->getType())); TI->eraseFromParent(); + new UnreachableInst(M.getContext(), BB); if (&*BB != &F->front()) BlocksToErase.push_back(BB); - else - new UnreachableInst(M.getContext(), BB); continue; } diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 056dd11b5ab3..d1a0a82b9b08 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2593,13 +2593,21 @@ private: V = rewriteIntegerLoad(LI); } else if (NewBeginOffset == NewAllocaBeginOffset && canConvertValue(DL, NewAllocaTy, LI.getType())) { - V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(), - LI.getName()); + LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + LI.isVolatile(), LI.getName()); + if (LI.isVolatile()) + NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + + V = NewLI; } else { Type *LTy = TargetTy->getPointerTo(); - V = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy), - getSliceAlign(TargetTy), LI.isVolatile(), - LI.getName()); + LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy), + getSliceAlign(TargetTy), + LI.isVolatile(), LI.getName()); + if (LI.isVolatile()) + NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + + V = NewLI; IsPtrAdjusted = true; } V = convertValue(DL, IRB, V, TargetTy); @@ -2722,7 +2730,8 @@ private: NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()), SI.isVolatile()); } - (void)NewSI; + if (SI.isVolatile()) + NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope()); Pass.DeadInsts.insert(&SI); deleteIfTriviallyDead(OldOp); diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 53471de6154c..ef7dacac79cb 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -440,8 +440,6 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); - if (AA) - AA->copyValue(PN, NewPHI); // NOTE! This loop walks backwards for a reason! First off, this minimizes // the cost of removal if we end up removing a large number of values, and diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 470e2d09132e..716e655affb9 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_library(LLVMTransformUtils LoopUnroll.cpp LoopUnrollRuntime.cpp LoopUtils.cpp + LoopVersioning.cpp LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 4f8d1dfbe5df..cc4d6c6fb192 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -720,3 +721,68 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ModuleLevelChanges, Returns, NameSuffix, CodeInfo, nullptr); } + +/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap. +void llvm::remapInstructionsInBlocks( + const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) { + // Rewrite the code to refer to itself. + for (auto *BB : Blocks) + for (auto &Inst : *BB) + RemapInstruction(&Inst, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); +} + +/// \brief Clones a loop \p OrigLoop. Returns the loop and the blocks in \p +/// Blocks. +/// +/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block +/// \p LoopDomBB. Insert the new blocks before block specified in \p Before. +Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB, + Loop *OrigLoop, ValueToValueMapTy &VMap, + const Twine &NameSuffix, LoopInfo *LI, + DominatorTree *DT, + SmallVectorImpl<BasicBlock *> &Blocks) { + Function *F = OrigLoop->getHeader()->getParent(); + Loop *ParentLoop = OrigLoop->getParentLoop(); + + Loop *NewLoop = new Loop(); + if (ParentLoop) + ParentLoop->addChildLoop(NewLoop); + else + LI->addTopLevelLoop(NewLoop); + + BasicBlock *OrigPH = OrigLoop->getLoopPreheader(); + assert(OrigPH && "No preheader"); + BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F); + // To rename the loop PHIs. + VMap[OrigPH] = NewPH; + Blocks.push_back(NewPH); + + // Update LoopInfo. + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(NewPH, *LI); + + // Update DominatorTree. + DT->addNewBlock(NewPH, LoopDomBB); + + for (BasicBlock *BB : OrigLoop->getBlocks()) { + BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F); + VMap[BB] = NewBB; + + // Update LoopInfo. + NewLoop->addBasicBlockToLoop(NewBB, *LI); + + // Update DominatorTree. + BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); + DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB])); + + Blocks.push_back(NewBB); + } + + // Move them physically from the end of the block list. + F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH); + F->getBasicBlockList().splice(Before, F->getBasicBlockList(), + NewLoop->getHeader(), F->end()); + + return NewLoop; +} diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 56085579b61c..50ca6234d0b7 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -900,13 +900,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, if (auto *GO = dyn_cast<GlobalObject>(V)) { // If there is a large requested alignment and we can, bump up the alignment - // of the global. - if (GO->isDeclaration()) - return Align; - // If the memory we set aside for the global may not be the memory used by - // the final program then it is impossible for us to reliably enforce the - // preferred alignment. - if (GO->isWeakForLinker()) + // of the global. If the memory we set aside for the global may not be the + // memory used by the final program then it is impossible for us to reliably + // enforce the preferred alignment. + if (!GO->isStrongDefinitionForLinker()) return Align; if (GO->getAlignment() >= PrefAlign) diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 2e7d21cb171f..5c98043e4632 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -403,7 +403,6 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, PHINode *PN = cast<PHINode>(I); PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), PN->getName()+".be", BETerminator); - if (AA) AA->copyValue(PN, NewPN); // Loop over the PHI node, moving all entries except the one for the // preheader over to the new PHI node. diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp new file mode 100644 index 000000000000..832079d2cf63 --- /dev/null +++ b/lib/Transforms/Utils/LoopVersioning.cpp @@ -0,0 +1,106 @@ +//===- LoopVersioning.cpp - Utility to version a loop ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a utility class to perform loop versioning. The versioned +// loop speculates that otherwise may-aliasing memory accesses don't overlap and +// emits checks to prove this. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" + +using namespace llvm; + +LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, + DominatorTree *DT, + const SmallVector<int, 8> *PtrToPartition) + : VersionedLoop(L), NonVersionedLoop(nullptr), + PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) { + assert(L->getExitBlock() && "No single exit block"); + assert(L->getLoopPreheader() && "No preheader"); +} + +bool LoopVersioning::needsRuntimeChecks() const { + return LAI.getRuntimePointerChecking()->needsAnyChecking(PtrToPartition); +} + +void LoopVersioning::versionLoop(Pass *P) { + Instruction *FirstCheckInst; + Instruction *MemRuntimeCheck; + // Add the memcheck in the original preheader (this is empty initially). + BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader(); + std::tie(FirstCheckInst, MemRuntimeCheck) = + LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition); + assert(MemRuntimeCheck && "called even though needsAnyChecking = false"); + + // Rename the block to make the IR more readable. + MemCheckBB->setName(VersionedLoop->getHeader()->getName() + ".lver.memcheck"); + + // Create empty preheader for the loop (and after cloning for the + // non-versioned loop). + BasicBlock *PH = SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI); + PH->setName(VersionedLoop->getHeader()->getName() + ".ph"); + + // Clone the loop including the preheader. + // + // FIXME: This does not currently preserve SimplifyLoop because the exit + // block is a join between the two loops. + SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks; + NonVersionedLoop = + cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap, ".lver.orig", + LI, DT, NonVersionedLoopBlocks); + remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap); + + // Insert the conditional branch based on the result of the memchecks. + Instruction *OrigTerm = MemCheckBB->getTerminator(); + BranchInst::Create(NonVersionedLoop->getLoopPreheader(), + VersionedLoop->getLoopPreheader(), MemRuntimeCheck, + OrigTerm); + OrigTerm->eraseFromParent(); + + // The loops merge in the original exit block. This is now dominated by the + // memchecking block. + DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB); +} + +void LoopVersioning::addPHINodes( + const SmallVectorImpl<Instruction *> &DefsUsedOutside) { + BasicBlock *PHIBlock = VersionedLoop->getExitBlock(); + assert(PHIBlock && "No single successor to loop exit block"); + + for (auto *Inst : DefsUsedOutside) { + auto *NonVersionedLoopInst = cast<Instruction>(VMap[Inst]); + PHINode *PN; + + // First see if we have a single-operand PHI with the value defined by the + // original loop. + for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) { + assert(PN->getNumOperands() == 1 && + "Exit block should only have on predecessor"); + if (PN->getIncomingValue(0) == Inst) + break; + } + // If not create it. + if (!PN) { + PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver", + PHIBlock->begin()); + for (auto *User : Inst->users()) + if (!VersionedLoop->contains(cast<Instruction>(User)->getParent())) + User->replaceUsesOfWith(Inst, PN); + PN->addIncoming(Inst, VersionedLoop->getExitingBlock()); + } + // Add the new incoming value from the non-versioned loop. + PN->addIncoming(NonVersionedLoopInst, NonVersionedLoop->getExitingBlock()); + } +} diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 5ba14174ac79..69ca2688c810 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -148,8 +148,9 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor( cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8)); -/// We don't unroll loops with a known constant trip count below this number. -static const unsigned TinyTripCountUnrollThreshold = 128; +/// We don't interleave loops with a known constant trip count below this +/// number. +static const unsigned TinyTripCountInterleaveThreshold = 128; static cl::opt<unsigned> ForceTargetNumScalarRegs( "force-target-num-scalar-regs", cl::init(0), cl::Hidden, @@ -180,7 +181,8 @@ static cl::opt<unsigned> ForceTargetInstructionCost( static cl::opt<unsigned> SmallLoopCost( "small-loop-cost", cl::init(20), cl::Hidden, - cl::desc("The cost of a loop that is considered 'small' by the unroller.")); + cl::desc( + "The cost of a loop that is considered 'small' by the interleaver.")); static cl::opt<bool> LoopVectorizeWithBlockFrequency( "loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden, @@ -188,10 +190,11 @@ static cl::opt<bool> LoopVectorizeWithBlockFrequency( "heuristics minimizing code growth in cold regions and being more " "aggressive in hot regions.")); -// Runtime unroll loops for load/store throughput. -static cl::opt<bool> EnableLoadStoreRuntimeUnroll( - "enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden, - cl::desc("Enable runtime unrolling until load/store ports are saturated")); +// Runtime interleave loops for load/store throughput. +static cl::opt<bool> EnableLoadStoreRuntimeInterleave( + "enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden, + cl::desc( + "Enable runtime interleaving until load/store ports are saturated")); /// The number of stores in a loop that are allowed to need predication. static cl::opt<unsigned> NumberOfStoresToPredicate( @@ -200,15 +203,15 @@ static cl::opt<unsigned> NumberOfStoresToPredicate( static cl::opt<bool> EnableIndVarRegisterHeur( "enable-ind-var-reg-heur", cl::init(true), cl::Hidden, - cl::desc("Count the induction variable only once when unrolling")); + cl::desc("Count the induction variable only once when interleaving")); static cl::opt<bool> EnableCondStoresVectorization( "enable-cond-stores-vec", cl::init(false), cl::Hidden, cl::desc("Enable if predication of stores during vectorization.")); -static cl::opt<unsigned> MaxNestedScalarReductionUF( - "max-nested-scalar-reduction-unroll", cl::init(2), cl::Hidden, - cl::desc("The maximum unroll factor to use when unrolling a scalar " +static cl::opt<unsigned> MaxNestedScalarReductionIC( + "max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden, + cl::desc("The maximum interleave count to use when interleaving a scalar " "reduction in a nested loop.")); namespace { @@ -921,8 +924,8 @@ public: bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); } /// Returns the information that we collected about runtime memory check. - const LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() const { - return LAI->getRuntimePointerCheck(); + const RuntimePointerChecking *getRuntimePointerChecking() const { + return LAI->getRuntimePointerChecking(); } const LoopAccessInfo *getLAI() const { @@ -1105,12 +1108,19 @@ public: /// 64 bit loop indices. unsigned getWidestType(); + /// \return The desired interleave count. + /// If interleave count has been specified by metadata it will be returned. + /// Otherwise, the interleave count is computed and returned. VF and LoopCost + /// are the selected vectorization factor and the cost of the selected VF. + unsigned selectInterleaveCount(bool OptForSize, unsigned VF, + unsigned LoopCost); + /// \return The most profitable unroll factor. - /// If UserUF is non-zero then this method finds the best unroll-factor - /// based on register pressure and other parameters. - /// VF and LoopCost are the selected vectorization factor and the cost of the - /// selected VF. - unsigned selectUnrollFactor(bool OptForSize, unsigned VF, unsigned LoopCost); + /// This method finds the best unroll-factor based on register pressure and + /// other parameters. VF and LoopCost are the selected vectorization factor + /// and the cost of the selected VF. + unsigned computeInterleaveCount(bool OptForSize, unsigned VF, + unsigned LoopCost); /// \brief A struct that represents some properties of the register usage /// of a loop. @@ -1456,9 +1466,14 @@ struct LoopVectorize : public FunctionPass { const BranchProbability ColdProb(1, 5); // 20% ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb; - // If the target claims to have no vector registers don't attempt - // vectorization. - if (!TTI->getNumberOfRegisters(true)) + // Don't attempt if + // 1. the target claims to have no vector registers, and + // 2. interleaving won't help ILP. + // + // The second condition is necessary because, even if the target has no + // vector registers, loop vectorization may still enable scalar + // interleaving. + if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2) return false; // Build up a worklist of inner-loops to vectorize. This is necessary as @@ -1633,18 +1648,17 @@ struct LoopVectorize : public FunctionPass { const LoopVectorizationCostModel::VectorizationFactor VF = CM.selectVectorizationFactor(OptForSize); - // Select the unroll factor. - const unsigned UF = - CM.selectUnrollFactor(OptForSize, VF.Width, VF.Cost); + // Select the interleave count. + unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost); DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " << DebugLocStr << '\n'); - DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n'); + DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); if (VF.Width == 1) { DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial\n"); - if (UF == 1) { + if (IC == 1) { emitOptimizationRemarkAnalysis( F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), "not beneficial to vectorize and user disabled interleaving"); @@ -1654,17 +1668,14 @@ struct LoopVectorize : public FunctionPass { // Report the unrolling decision. emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), - Twine("unrolled with interleaving factor " + - Twine(UF) + + Twine("interleaved by " + Twine(IC) + " (vectorization not beneficial)")); - // We decided not to vectorize, but we may want to unroll. - - InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, UF); + InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC); Unroller.vectorize(&LVL); } else { // If we decided that it is *legal* to vectorize the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, UF); + InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC); LB.vectorize(&LVL); ++LoopsVectorized; @@ -1675,10 +1686,10 @@ struct LoopVectorize : public FunctionPass { AddRuntimeUnrollDisableMetaData(L); // Report the vectorization decision. - emitOptimizationRemark( - F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), - Twine("vectorized loop (vectorization factor: ") + Twine(VF.Width) + - ", unrolling interleave factor: " + Twine(UF) + ")"); + emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + Twine("vectorized loop (vectorization width: ") + + Twine(VF.Width) + ", interleaved count: " + + Twine(IC) + ")"); } // Mark the loop as already vectorized to avoid vectorizing again. @@ -1760,31 +1771,6 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, return Builder.CreateAdd(Val, Step, "induction"); } -/// \brief Find the operand of the GEP that should be checked for consecutive -/// stores. This ignores trailing indices that have no effect on the final -/// pointer. -static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) { - const DataLayout &DL = Gep->getModule()->getDataLayout(); - unsigned LastOperand = Gep->getNumOperands() - 1; - unsigned GEPAllocSize = DL.getTypeAllocSize( - cast<PointerType>(Gep->getType()->getScalarType())->getElementType()); - - // Walk backwards and try to peel off zeros. - while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { - // Find the type we're currently indexing into. - gep_type_iterator GEPTI = gep_type_begin(Gep); - std::advance(GEPTI, LastOperand - 1); - - // If it's a type with the same allocation size as the result of the GEP we - // can peel off the zero index. - if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize) - break; - --LastOperand; - } - - return LastOperand; -} - int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); // Make sure that the pointer does not point to structs. @@ -2503,9 +2489,9 @@ void InnerLoopVectorizer::createEmptyLoop() { */ BasicBlock *OldBasicBlock = OrigLoop->getHeader(); - BasicBlock *BypassBlock = OrigLoop->getLoopPreheader(); + BasicBlock *VectorPH = OrigLoop->getLoopPreheader(); BasicBlock *ExitBlock = OrigLoop->getExitBlock(); - assert(BypassBlock && "Invalid loop structure"); + assert(VectorPH && "Invalid loop structure"); assert(ExitBlock && "Must have an exit block"); // Some loops have a single integer induction variable, while other loops @@ -2545,44 +2531,35 @@ void InnerLoopVectorizer::createEmptyLoop() { // loop. Value *BackedgeCount = Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(), - BypassBlock->getTerminator()); + VectorPH->getTerminator()); if (BackedgeCount->getType()->isPointerTy()) BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy, "backedge.ptrcnt.to.int", - BypassBlock->getTerminator()); + VectorPH->getTerminator()); Instruction *CheckBCOverflow = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount, Constant::getAllOnesValue(BackedgeCount->getType()), - "backedge.overflow", BypassBlock->getTerminator()); + "backedge.overflow", VectorPH->getTerminator()); // The loop index does not have to start at Zero. Find the original start // value from the induction PHI node. If we don't have an induction variable // then we know that it starts at zero. - Builder.SetInsertPoint(BypassBlock->getTerminator()); - Value *StartIdx = ExtendedIdx = OldInduction ? - Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock), - IdxTy): - ConstantInt::get(IdxTy, 0); - - // We need an instruction to anchor the overflow check on. StartIdx needs to - // be defined before the overflow check branch. Because the scalar preheader - // is going to merge the start index and so the overflow branch block needs to - // contain a definition of the start index. - Instruction *OverflowCheckAnchor = BinaryOperator::CreateAdd( - StartIdx, ConstantInt::get(IdxTy, 0), "overflow.check.anchor", - BypassBlock->getTerminator()); + Builder.SetInsertPoint(VectorPH->getTerminator()); + Value *StartIdx = ExtendedIdx = + OldInduction + ? Builder.CreateZExt(OldInduction->getIncomingValueForBlock(VectorPH), + IdxTy) + : ConstantInt::get(IdxTy, 0); // Count holds the overall loop count (N). Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), - BypassBlock->getTerminator()); + VectorPH->getTerminator()); - LoopBypassBlocks.push_back(BypassBlock); + LoopBypassBlocks.push_back(VectorPH); // Split the single block loop into the two loop structure described above. - BasicBlock *VectorPH = - BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph"); BasicBlock *VecBody = - VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body"); + VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body"); BasicBlock *MiddleBlock = VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block"); BasicBlock *ScalarPH = @@ -2597,7 +2574,6 @@ void InnerLoopVectorizer::createEmptyLoop() { if (ParentLoop) { ParentLoop->addChildLoop(Lp); ParentLoop->addBasicBlockToLoop(ScalarPH, *LI); - ParentLoop->addBasicBlockToLoop(VectorPH, *LI); ParentLoop->addBasicBlockToLoop(MiddleBlock, *LI); } else { LI->addTopLevelLoop(Lp); @@ -2615,9 +2591,20 @@ void InnerLoopVectorizer::createEmptyLoop() { // times the unroll factor (num of SIMD instructions). Constant *Step = ConstantInt::get(IdxTy, VF * UF); + // Generate code to check that the loop's trip count that we computed by + // adding one to the backedge-taken count will not overflow. + BasicBlock *NewVectorPH = + VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked"); + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); + ReplaceInstWithInst( + VectorPH->getTerminator(), + BranchInst::Create(ScalarPH, NewVectorPH, CheckBCOverflow)); + VectorPH = NewVectorPH; + // This is the IR builder that we use to add all of the logic for bypassing // the new vector loop. - IRBuilder<> BypassBuilder(BypassBlock->getTerminator()); + IRBuilder<> BypassBuilder(VectorPH->getTerminator()); setDebugLocFromInst(BypassBuilder, getDebugLocFromInstOrOperands(OldInduction)); @@ -2646,24 +2633,14 @@ void InnerLoopVectorizer::createEmptyLoop() { // jump to the scalar loop. Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero"); - - BasicBlock *LastBypassBlock = BypassBlock; - - // Generate code to check that the loops trip count that we computed by adding - // one to the backedge-taken count will not overflow. - { - auto PastOverflowCheck = - std::next(BasicBlock::iterator(OverflowCheckAnchor)); - BasicBlock *CheckBlock = - LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked"); - if (ParentLoop) - ParentLoop->addBasicBlockToLoop(CheckBlock, *LI); - LoopBypassBlocks.push_back(CheckBlock); - ReplaceInstWithInst( - LastBypassBlock->getTerminator(), - BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow)); - LastBypassBlock = CheckBlock; - } + NewVectorPH = + VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); + LoopBypassBlocks.push_back(VectorPH); + ReplaceInstWithInst(VectorPH->getTerminator(), + BranchInst::Create(MiddleBlock, NewVectorPH, Cmp)); + VectorPH = NewVectorPH; // Generate the code to check that the strides we assumed to be one are really // one. We want the new basic block to start at the first instruction in a @@ -2671,23 +2648,24 @@ void InnerLoopVectorizer::createEmptyLoop() { Instruction *StrideCheck; Instruction *FirstCheckInst; std::tie(FirstCheckInst, StrideCheck) = - addStrideCheck(LastBypassBlock->getTerminator()); + addStrideCheck(VectorPH->getTerminator()); if (StrideCheck) { AddedSafetyChecks = true; // Create a new block containing the stride check. - BasicBlock *CheckBlock = - LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck"); + VectorPH->setName("vector.stridecheck"); + NewVectorPH = + VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); if (ParentLoop) - ParentLoop->addBasicBlockToLoop(CheckBlock, *LI); - LoopBypassBlocks.push_back(CheckBlock); + ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); + LoopBypassBlocks.push_back(VectorPH); // Replace the branch into the memory check block with a conditional branch // for the "few elements case". - ReplaceInstWithInst(LastBypassBlock->getTerminator(), - BranchInst::Create(MiddleBlock, CheckBlock, Cmp)); + ReplaceInstWithInst( + VectorPH->getTerminator(), + BranchInst::Create(MiddleBlock, NewVectorPH, StrideCheck)); - Cmp = StrideCheck; - LastBypassBlock = CheckBlock; + VectorPH = NewVectorPH; } // Generate the code that checks in runtime if arrays overlap. We put the @@ -2695,28 +2673,26 @@ void InnerLoopVectorizer::createEmptyLoop() { // faster. Instruction *MemRuntimeCheck; std::tie(FirstCheckInst, MemRuntimeCheck) = - Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator()); + Legal->getLAI()->addRuntimeCheck(VectorPH->getTerminator()); if (MemRuntimeCheck) { AddedSafetyChecks = true; // Create a new block containing the memory check. - BasicBlock *CheckBlock = - LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck"); + VectorPH->setName("vector.memcheck"); + NewVectorPH = + VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph"); if (ParentLoop) - ParentLoop->addBasicBlockToLoop(CheckBlock, *LI); - LoopBypassBlocks.push_back(CheckBlock); + ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI); + LoopBypassBlocks.push_back(VectorPH); // Replace the branch into the memory check block with a conditional branch // for the "few elements case". - ReplaceInstWithInst(LastBypassBlock->getTerminator(), - BranchInst::Create(MiddleBlock, CheckBlock, Cmp)); + ReplaceInstWithInst( + VectorPH->getTerminator(), + BranchInst::Create(MiddleBlock, NewVectorPH, MemRuntimeCheck)); - Cmp = MemRuntimeCheck; - LastBypassBlock = CheckBlock; + VectorPH = NewVectorPH; } - ReplaceInstWithInst(LastBypassBlock->getTerminator(), - BranchInst::Create(MiddleBlock, VectorPH, Cmp)); - // We are going to resume the execution of the scalar loop. // Go over all of the induction variables that we found and fix the // PHIs that are left in the scalar version of the loop. @@ -3831,7 +3807,7 @@ bool LoopVectorizationLegality::canVectorize() { } // We can only vectorize innermost loops. - if (!TheLoop->getSubLoopsVector().empty()) { + if (!TheLoop->empty()) { emitAnalysis(VectorizationReport() << "loop is not the innermost loop"); return false; } @@ -3897,10 +3873,11 @@ bool LoopVectorizationLegality::canVectorize() { // Collect all of the variables that remain uniform after vectorization. collectLoopUniforms(); - DEBUG(dbgs() << "LV: We can vectorize this loop" << - (LAI->getRuntimePointerCheck()->Need ? " (with a runtime bound check)" : - "") - <<"!\n"); + DEBUG(dbgs() << "LV: We can vectorize this loop" + << (LAI->getRuntimePointerChecking()->Need + ? " (with a runtime bound check)" + : "") + << "!\n"); // Analyze interleaved memory accesses. if (EnableInterleavedMemAccesses) @@ -4130,118 +4107,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return true; } -///\brief Remove GEPs whose indices but the last one are loop invariant and -/// return the induction operand of the gep pointer. -static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { - GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); - if (!GEP) - return Ptr; - - unsigned InductionOperand = getGEPInductionOperand(GEP); - - // Check that all of the gep indices are uniform except for our induction - // operand. - for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) - if (i != InductionOperand && - !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp)) - return Ptr; - return GEP->getOperand(InductionOperand); -} - -///\brief Look for a cast use of the passed value. -static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { - Value *UniqueCast = nullptr; - for (User *U : Ptr->users()) { - CastInst *CI = dyn_cast<CastInst>(U); - if (CI && CI->getType() == Ty) { - if (!UniqueCast) - UniqueCast = CI; - else - return nullptr; - } - } - return UniqueCast; -} - -///\brief Get the stride of a pointer access in a loop. -/// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a -/// pointer to the Value, or null otherwise. -static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { - const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); - if (!PtrTy || PtrTy->isAggregateType()) - return nullptr; - - // Try to remove a gep instruction to make the pointer (actually index at this - // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the - // pointer, otherwise, we are analyzing the index. - Value *OrigPtr = Ptr; - - // The size of the pointer access. - int64_t PtrAccessSize = 1; - - Ptr = stripGetElementPtr(Ptr, SE, Lp); - const SCEV *V = SE->getSCEV(Ptr); - - if (Ptr != OrigPtr) - // Strip off casts. - while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) - V = C->getOperand(); - - const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V); - if (!S) - return nullptr; - - V = S->getStepRecurrence(*SE); - if (!V) - return nullptr; - - // Strip off the size of access multiplication if we are still analyzing the - // pointer. - if (OrigPtr == Ptr) { - const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout(); - DL.getTypeAllocSize(PtrTy->getElementType()); - if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) { - if (M->getOperand(0)->getSCEVType() != scConstant) - return nullptr; - - const APInt &APStepVal = - cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue(); - - // Huge step value - give up. - if (APStepVal.getBitWidth() > 64) - return nullptr; - - int64_t StepVal = APStepVal.getSExtValue(); - if (PtrAccessSize != StepVal) - return nullptr; - V = M->getOperand(1); - } - } - - // Strip off casts. - Type *StripedOffRecurrenceCast = nullptr; - if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) { - StripedOffRecurrenceCast = C->getType(); - V = C->getOperand(); - } - - // Look for the loop invariant symbolic value. - const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V); - if (!U) - return nullptr; - - Value *Stride = U->getValue(); - if (!Lp->isLoopInvariant(Stride)) - return nullptr; - - // If we have stripped off the recurrence cast we have to make sure that we - // return the value that is used in this loop so that we can replace it later. - if (StripedOffRecurrenceCast) - Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast); - - return Stride; -} - void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) { Value *Ptr = nullptr; if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess)) @@ -4585,7 +4450,7 @@ LoopVectorizationCostModel::VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { // Width 1 means no vectorize VectorizationFactor Factor = { 1U, 0U }; - if (OptForSize && Legal->getRuntimePointerCheck()->Need) { + if (OptForSize && Legal->getRuntimePointerChecking()->Need) { emitAnalysis(VectorizationReport() << "runtime pointer checks needed. Enable vectorization of this " "loop with '#pragma clang loop vectorize(enable)' when " @@ -4745,41 +4610,40 @@ unsigned LoopVectorizationCostModel::getWidestType() { return MaxWidth; } -unsigned -LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, - unsigned VF, - unsigned LoopCost) { +unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, + unsigned VF, + unsigned LoopCost) { - // -- The unroll heuristics -- - // We unroll the loop in order to expose ILP and reduce the loop overhead. + // -- The interleave heuristics -- + // We interleave the loop in order to expose ILP and reduce the loop overhead. // There are many micro-architectural considerations that we can't predict // at this level. For example, frontend pressure (on decode or fetch) due to // code size, or the number and capabilities of the execution ports. // - // We use the following heuristics to select the unroll factor: - // 1. If the code has reductions, then we unroll in order to break the cross + // We use the following heuristics to select the interleave count: + // 1. If the code has reductions, then we interleave to break the cross // iteration dependency. - // 2. If the loop is really small, then we unroll in order to reduce the loop + // 2. If the loop is really small, then we interleave to reduce the loop // overhead. - // 3. We don't unroll if we think that we will spill registers to memory due - // to the increased register pressure. + // 3. We don't interleave if we think that we will spill registers to memory + // due to the increased register pressure. // Use the user preference, unless 'auto' is selected. int UserUF = Hints->getInterleave(); if (UserUF != 0) return UserUF; - // When we optimize for size, we don't unroll. + // When we optimize for size, we don't interleave. if (OptForSize) return 1; - // We used the distance for the unroll factor. + // We used the distance for the interleave count. if (Legal->getMaxSafeDepDistBytes() != -1U) return 1; - // Do not unroll loops with a relatively small trip count. + // Do not interleave loops with a relatively small trip count. unsigned TC = SE->getSmallConstantTripCount(TheLoop); - if (TC > 1 && TC < TinyTripCountUnrollThreshold) + if (TC > 1 && TC < TinyTripCountInterleaveThreshold) return 1; unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1); @@ -4800,32 +4664,32 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U); R.NumInstructions = std::max(R.NumInstructions, 1U); - // We calculate the unroll factor using the following formula. + // We calculate the interleave count using the following formula. // Subtract the number of loop invariants from the number of available - // registers. These registers are used by all of the unrolled instances. + // registers. These registers are used by all of the interleaved instances. // Next, divide the remaining registers by the number of registers that is // required by the loop, in order to estimate how many parallel instances // fit without causing spills. All of this is rounded down if necessary to be - // a power of two. We want power of two unroll factors to simplify any + // a power of two. We want power of two interleave count to simplify any // addressing operations or alignment considerations. - unsigned UF = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) / + unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers); - // Don't count the induction variable as unrolled. + // Don't count the induction variable as interleaved. if (EnableIndVarRegisterHeur) - UF = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) / + IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) / std::max(1U, (R.MaxLocalUsers - 1))); - // Clamp the unroll factor ranges to reasonable factors. - unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor(VF); + // Clamp the interleave ranges to reasonable counts. + unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF); - // Check if the user has overridden the unroll max. + // Check if the user has overridden the max. if (VF == 1) { if (ForceTargetMaxScalarInterleaveFactor.getNumOccurrences() > 0) - MaxInterleaveSize = ForceTargetMaxScalarInterleaveFactor; + MaxInterleaveCount = ForceTargetMaxScalarInterleaveFactor; } else { if (ForceTargetMaxVectorInterleaveFactor.getNumOccurrences() > 0) - MaxInterleaveSize = ForceTargetMaxVectorInterleaveFactor; + MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor; } // If we did not calculate the cost for VF (because the user selected the VF) @@ -4833,72 +4697,74 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, if (LoopCost == 0) LoopCost = expectedCost(VF); - // Clamp the calculated UF to be between the 1 and the max unroll factor + // Clamp the calculated IC to be between the 1 and the max interleave count // that the target allows. - if (UF > MaxInterleaveSize) - UF = MaxInterleaveSize; - else if (UF < 1) - UF = 1; + if (IC > MaxInterleaveCount) + IC = MaxInterleaveCount; + else if (IC < 1) + IC = 1; - // Unroll if we vectorized this loop and there is a reduction that could - // benefit from unrolling. + // Interleave if we vectorized this loop and there is a reduction that could + // benefit from interleaving. if (VF > 1 && Legal->getReductionVars()->size()) { - DEBUG(dbgs() << "LV: Unrolling because of reductions.\n"); - return UF; + DEBUG(dbgs() << "LV: Interleaving because of reductions.\n"); + return IC; } // Note that if we've already vectorized the loop we will have done the - // runtime check and so unrolling won't require further checks. - bool UnrollingRequiresRuntimePointerCheck = - (VF == 1 && Legal->getRuntimePointerCheck()->Need); + // runtime check and so interleaving won't require further checks. + bool InterleavingRequiresRuntimePointerCheck = + (VF == 1 && Legal->getRuntimePointerChecking()->Need); - // We want to unroll small loops in order to reduce the loop overhead and + // We want to interleave small loops in order to reduce the loop overhead and // potentially expose ILP opportunities. DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n'); - if (!UnrollingRequiresRuntimePointerCheck && - LoopCost < SmallLoopCost) { + if (!InterleavingRequiresRuntimePointerCheck && LoopCost < SmallLoopCost) { // We assume that the cost overhead is 1 and we use the cost model - // to estimate the cost of the loop and unroll until the cost of the + // to estimate the cost of the loop and interleave until the cost of the // loop overhead is about 5% of the cost of the loop. - unsigned SmallUF = std::min(UF, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost)); + unsigned SmallIC = + std::min(IC, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost)); - // Unroll until store/load ports (estimated by max unroll factor) are + // Interleave until store/load ports (estimated by max interleave count) are // saturated. unsigned NumStores = Legal->getNumStores(); unsigned NumLoads = Legal->getNumLoads(); - unsigned StoresUF = UF / (NumStores ? NumStores : 1); - unsigned LoadsUF = UF / (NumLoads ? NumLoads : 1); + unsigned StoresIC = IC / (NumStores ? NumStores : 1); + unsigned LoadsIC = IC / (NumLoads ? NumLoads : 1); // If we have a scalar reduction (vector reductions are already dealt with // by this point), we can increase the critical path length if the loop - // we're unrolling is inside another loop. Limit, by default to 2, so the + // we're interleaving is inside another loop. Limit, by default to 2, so the // critical path only gets increased by one reduction operation. if (Legal->getReductionVars()->size() && TheLoop->getLoopDepth() > 1) { - unsigned F = static_cast<unsigned>(MaxNestedScalarReductionUF); - SmallUF = std::min(SmallUF, F); - StoresUF = std::min(StoresUF, F); - LoadsUF = std::min(LoadsUF, F); + unsigned F = static_cast<unsigned>(MaxNestedScalarReductionIC); + SmallIC = std::min(SmallIC, F); + StoresIC = std::min(StoresIC, F); + LoadsIC = std::min(LoadsIC, F); } - if (EnableLoadStoreRuntimeUnroll && std::max(StoresUF, LoadsUF) > SmallUF) { - DEBUG(dbgs() << "LV: Unrolling to saturate store or load ports.\n"); - return std::max(StoresUF, LoadsUF); + if (EnableLoadStoreRuntimeInterleave && + std::max(StoresIC, LoadsIC) > SmallIC) { + DEBUG(dbgs() << "LV: Interleaving to saturate store or load ports.\n"); + return std::max(StoresIC, LoadsIC); } - DEBUG(dbgs() << "LV: Unrolling to reduce branch cost.\n"); - return SmallUF; + DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n"); + return SmallIC; } - // Unroll if this is a large loop (small loops are already dealt with by this - // point) that could benefit from interleaved unrolling. + // Interleave if this is a large loop (small loops are already dealt with by + // this + // point) that could benefit from interleaving. bool HasReductions = (Legal->getReductionVars()->size() > 0); if (TTI.enableAggressiveInterleaving(HasReductions)) { - DEBUG(dbgs() << "LV: Unrolling to expose ILP.\n"); - return UF; + DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n"); + return IC; } - DEBUG(dbgs() << "LV: Not Unrolling.\n"); + DEBUG(dbgs() << "LV: Not Interleaving.\n"); return 1; } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7c4c279dcf4d..7bac407e77e9 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -69,8 +69,13 @@ static cl::opt<bool> ShouldStartVectorizeHorAtStore( cl::desc( "Attempt to vectorize horizontal reductions feeding into a store")); +static cl::opt<int> +MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden, + cl::desc("Attempt to vectorize for this register size in bits")); + namespace { +// FIXME: Set this via cl::opt to allow overriding. static const unsigned MinVecRegSize = 128; static const unsigned RecursionMaxDepth = 12; @@ -2136,9 +2141,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } // Prepare the operand vector. - for (unsigned j = 0; j < E->Scalars.size(); ++j) - Operands.push_back(cast<PHINode>(E->Scalars[j])-> - getIncomingValueForBlock(IBB)); + for (Value *V : E->Scalars) + Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(IBB)); Builder.SetInsertPoint(IBB->getTerminator()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); @@ -2172,8 +2176,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::FPTrunc: case Instruction::BitCast: { ValueList INVL; - for (int i = 0, e = E->Scalars.size(); i < e; ++i) - INVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); + for (Value *V : E->Scalars) + INVL.push_back(cast<Instruction>(V)->getOperand(0)); setInsertPointAfterBundle(E->Scalars); @@ -2191,9 +2195,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { case Instruction::FCmp: case Instruction::ICmp: { ValueList LHSV, RHSV; - for (int i = 0, e = E->Scalars.size(); i < e; ++i) { - LHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); - RHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); + for (Value *V : E->Scalars) { + LHSV.push_back(cast<Instruction>(V)->getOperand(0)); + RHSV.push_back(cast<Instruction>(V)->getOperand(1)); } setInsertPointAfterBundle(E->Scalars); @@ -2217,10 +2221,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } case Instruction::Select: { ValueList TrueVec, FalseVec, CondVec; - for (int i = 0, e = E->Scalars.size(); i < e; ++i) { - CondVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); - TrueVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); - FalseVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(2)); + for (Value *V : E->Scalars) { + CondVec.push_back(cast<Instruction>(V)->getOperand(0)); + TrueVec.push_back(cast<Instruction>(V)->getOperand(1)); + FalseVec.push_back(cast<Instruction>(V)->getOperand(2)); } setInsertPointAfterBundle(E->Scalars); @@ -2259,9 +2263,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL); else - for (int i = 0, e = E->Scalars.size(); i < e; ++i) { - LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); - RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); + for (Value *V : E->Scalars) { + LHSVL.push_back(cast<Instruction>(V)->getOperand(0)); + RHSVL.push_back(cast<Instruction>(V)->getOperand(1)); } setInsertPointAfterBundle(E->Scalars); @@ -2322,8 +2326,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { unsigned AS = SI->getPointerAddressSpace(); ValueList ValueOp; - for (int i = 0, e = E->Scalars.size(); i < e; ++i) - ValueOp.push_back(cast<StoreInst>(E->Scalars[i])->getValueOperand()); + for (Value *V : E->Scalars) + ValueOp.push_back(cast<StoreInst>(V)->getValueOperand()); setInsertPointAfterBundle(E->Scalars); @@ -2351,8 +2355,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { setInsertPointAfterBundle(E->Scalars); ValueList Op0VL; - for (int i = 0, e = E->Scalars.size(); i < e; ++i) - Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0)); + for (Value *V : E->Scalars) + Op0VL.push_back(cast<GetElementPtrInst>(V)->getOperand(0)); Value *Op0 = vectorizeTree(Op0VL); @@ -2360,8 +2364,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e; ++j) { ValueList OpVL; - for (int i = 0, e = E->Scalars.size(); i < e; ++i) - OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j)); + for (Value *V : E->Scalars) + OpVL.push_back(cast<GetElementPtrInst>(V)->getOperand(j)); Value *OpVec = vectorizeTree(OpVL); OpVecs.push_back(OpVec); @@ -2397,8 +2401,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { OpVecs.push_back(CEI->getArgOperand(j)); continue; } - for (int i = 0, e = E->Scalars.size(); i < e; ++i) { - CallInst *CEI = cast<CallInst>(E->Scalars[i]); + for (Value *V : E->Scalars) { + CallInst *CEI = cast<CallInst>(V); OpVL.push_back(CEI->getArgOperand(j)); } @@ -3089,6 +3093,17 @@ struct SLPVectorizer : public FunctionPass { if (!TTI->getNumberOfRegisters(true)) return false; + // Use the vector register size specified by the target unless overridden + // by a command-line option. + // TODO: It would be better to limit the vectorization factor based on + // data type rather than just register size. For example, x86 AVX has + // 256-bit registers, but it does not support integer operations + // at that width (that requires AVX2). + if (MaxVectorRegSizeOption.getNumOccurrences()) + MaxVecRegSize = MaxVectorRegSizeOption; + else + MaxVecRegSize = TTI->getRegisterBitWidth(true); + // Don't vectorize when the attribute NoImplicitFloat is used. if (F.hasFnAttribute(Attribute::NoImplicitFloat)) return false; @@ -3166,12 +3181,13 @@ private: bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R); bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold, - BoUpSLP &R); + BoUpSLP &R, unsigned VecRegSize); bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold, BoUpSLP &R); private: StoreListMap StoreRefs; + unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt. }; /// \brief Check that the Values in the slice in VL array are still existent in @@ -3186,14 +3202,15 @@ static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH, } bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain, - int CostThreshold, BoUpSLP &R) { + int CostThreshold, BoUpSLP &R, + unsigned VecRegSize) { unsigned ChainLen = Chain.size(); DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen << "\n"); Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); auto &DL = cast<StoreInst>(Chain[0])->getModule()->getDataLayout(); unsigned Sz = DL.getTypeSizeInBits(StoreTy); - unsigned VF = MinVecRegSize / Sz; + unsigned VF = VecRegSize / Sz; if (!isPowerOf2_32(Sz) || VF < 2) return false; @@ -3277,12 +3294,16 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores, I = ConsecutiveChain[I]; } - bool Vectorized = vectorizeStoreChain(Operands, costThreshold, R); - - // Mark the vectorized stores so that we don't vectorize them again. - if (Vectorized) - VectorizedStores.insert(Operands.begin(), Operands.end()); - Changed |= Vectorized; + // FIXME: Is division-by-2 the correct step? Should we assert that the + // register size is a power-of-2? + for (unsigned Size = MaxVecRegSize; Size >= MinVecRegSize; Size /= 2) { + if (vectorizeStoreChain(Operands, costThreshold, R, Size)) { + // Mark the vectorized stores so that we don't vectorize them again. + VectorizedStores.insert(Operands.begin(), Operands.end()); + Changed = true; + break; + } + } } return Changed; @@ -3293,8 +3314,8 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { unsigned count = 0; StoreRefs.clear(); const DataLayout &DL = BB->getModule()->getDataLayout(); - for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - StoreInst *SI = dyn_cast<StoreInst>(it); + for (Instruction &I : *BB) { + StoreInst *SI = dyn_cast<StoreInst>(&I); if (!SI) continue; @@ -3342,13 +3363,15 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, Type *Ty0 = I0->getType(); unsigned Sz = DL.getTypeSizeInBits(Ty0); + // FIXME: Register size should be a parameter to this function, so we can + // try different vectorization factors. unsigned VF = MinVecRegSize / Sz; - for (int i = 0, e = VL.size(); i < e; ++i) { - Type *Ty = VL[i]->getType(); + for (Value *V : VL) { + Type *Ty = V->getType(); if (!isValidElementType(Ty)) return false; - Instruction *Inst = dyn_cast<Instruction>(VL[i]); + Instruction *Inst = dyn_cast<Instruction>(V); if (!Inst || Inst->getOpcode() != Opcode0) return false; } @@ -3571,6 +3594,8 @@ public: const DataLayout &DL = B->getModule()->getDataLayout(); ReductionOpcode = B->getOpcode(); ReducedValueOpcode = 0; + // FIXME: Register size should be a parameter to this function, so we can + // try different vectorization factors. ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty); ReductionRoot = B; ReductionPHI = Phi; @@ -3997,6 +4022,9 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) { << it->second.size() << ".\n"); // Process the stores in chunks of 16. + // TODO: The limit of 16 inhibits greater vectorization factors. + // For example, AVX2 supports v32i8. Increasing this limit, however, + // may cause a significant compile-time increase. for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) { unsigned Len = std::min<unsigned>(CE - CI, 16); Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len), |