diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-08-07 23:01:33 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-08-07 23:01:33 +0000 |
commit | ee8648bdac07986a0f1ec897b02ec82a2f144d46 (patch) | |
tree | 52d1861acda1205241ee35a94aa63129c604d469 /lib/Analysis | |
parent | 1a82d4c088707c791c792f6822f611b47a12bdfe (diff) |
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/AliasAnalysis.cpp | 5 | ||||
-rw-r--r-- | lib/Analysis/AliasDebugger.cpp | 4 | ||||
-rw-r--r-- | lib/Analysis/AliasSetTracker.cpp | 3 | ||||
-rw-r--r-- | lib/Analysis/BasicAliasAnalysis.cpp | 6 | ||||
-rw-r--r-- | lib/Analysis/ConstantFolding.cpp | 6 | ||||
-rw-r--r-- | lib/Analysis/IPA/GlobalsModRef.cpp | 320 | ||||
-rw-r--r-- | lib/Analysis/IPA/InlineCost.cpp | 6 | ||||
-rw-r--r-- | lib/Analysis/IVUsers.cpp | 17 | ||||
-rw-r--r-- | lib/Analysis/InstructionSimplify.cpp | 114 | ||||
-rw-r--r-- | lib/Analysis/LoopAccessAnalysis.cpp | 447 | ||||
-rw-r--r-- | lib/Analysis/NoAliasAnalysis.cpp | 1 | ||||
-rw-r--r-- | lib/Analysis/TargetTransformInfo.cpp | 6 | ||||
-rw-r--r-- | lib/Analysis/ValueTracking.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/VectorUtils.cpp | 198 |
14 files changed, 832 insertions, 303 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index ad0727a0e0e5..44d137dffd22 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -71,11 +71,6 @@ void AliasAnalysis::deleteValue(Value *V) { AA->deleteValue(V); } -void AliasAnalysis::copyValue(Value *From, Value *To) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - AA->copyValue(From, To); -} - void AliasAnalysis::addEscapingUse(Use &U) { assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); AA->addEscapingUse(U); diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp index 1ef49fc02fef..e5107b3bc827 100644 --- a/lib/Analysis/AliasDebugger.cpp +++ b/lib/Analysis/AliasDebugger.cpp @@ -124,10 +124,6 @@ namespace { assert(Vals.find(V) != Vals.end() && "Never seen value in AA before"); AliasAnalysis::deleteValue(V); } - void copyValue(Value *From, Value *To) override { - Vals.insert(To); - AliasAnalysis::copyValue(From, To); - } }; } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index bf8cda1ffaec..54d0f4304e1f 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -544,9 +544,6 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { // the tracker already knows about a value, it will ignore the request. // void AliasSetTracker::copyValue(Value *From, Value *To) { - // Notify the alias analysis implementation that this value is copied. - AA.copyValue(From, To); - // First, look up the PointerRec for this pointer. PointerMapType::iterator I = PointerMap.find_as(From); if (I == PointerMap.end()) diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 8e812252fdfe..68f766edb301 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -685,6 +685,9 @@ BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { if (CS.onlyReadsMemory()) Min = OnlyReadsMemory; + if (CS.onlyAccessesArgMemory()) + Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees); + // The AliasAnalysis base class has some smarts, lets use them. return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); } @@ -710,6 +713,9 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) { if (F->onlyReadsMemory()) Min = OnlyReadsMemory; + if (F->onlyAccessesArgMemory()) + Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees); + const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); if (isMemsetPattern16(F, TLI)) diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 2f4c6a92f9af..02a5aef03223 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1234,6 +1234,8 @@ bool llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::floor: case Intrinsic::ceil: case Intrinsic::sqrt: + case Intrinsic::sin: + case Intrinsic::cos: case Intrinsic::pow: case Intrinsic::powi: case Intrinsic::bswap: @@ -1450,6 +1452,10 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, return ConstantFoldFP(floor, V, Ty); case Intrinsic::ceil: return ConstantFoldFP(ceil, V, Ty); + case Intrinsic::sin: + return ConstantFoldFP(sin, V, Ty); + case Intrinsic::cos: + return ConstantFoldFP(cos, V, Ty); } if (!TLI) diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index f1ddde252924..18d45dd6a396 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -42,94 +42,111 @@ STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); namespace { - /// FunctionRecord - One instance of this structure is stored for every - /// function in the program. Later, the entries for these functions are - /// removed if the function is found to call an external function (in which - /// case we know nothing about it. - struct FunctionRecord { - /// GlobalInfo - Maintain mod/ref info for all of the globals without - /// addresses taken that are read or written (transitively) by this - /// function. - std::map<const GlobalValue*, unsigned> GlobalInfo; - - /// MayReadAnyGlobal - May read global variables, but it is not known which. - bool MayReadAnyGlobal; - - unsigned getInfoForGlobal(const GlobalValue *GV) const { - unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; - std::map<const GlobalValue*, unsigned>::const_iterator I = +/// FunctionRecord - One instance of this structure is stored for every +/// function in the program. Later, the entries for these functions are +/// removed if the function is found to call an external function (in which +/// case we know nothing about it. +struct FunctionRecord { + /// GlobalInfo - Maintain mod/ref info for all of the globals without + /// addresses taken that are read or written (transitively) by this + /// function. + std::map<const GlobalValue *, unsigned> GlobalInfo; + + /// MayReadAnyGlobal - May read global variables, but it is not known which. + bool MayReadAnyGlobal; + + unsigned getInfoForGlobal(const GlobalValue *GV) const { + unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; + std::map<const GlobalValue *, unsigned>::const_iterator I = GlobalInfo.find(GV); - if (I != GlobalInfo.end()) - Effect |= I->second; - return Effect; - } + if (I != GlobalInfo.end()) + Effect |= I->second; + return Effect; + } - /// FunctionEffect - Capture whether or not this function reads or writes to - /// ANY memory. If not, we can do a lot of aggressive analysis on it. - unsigned FunctionEffect; + /// FunctionEffect - Capture whether or not this function reads or writes to + /// ANY memory. If not, we can do a lot of aggressive analysis on it. + unsigned FunctionEffect; - FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {} - }; + FunctionRecord() : MayReadAnyGlobal(false), FunctionEffect(0) {} +}; - /// GlobalsModRef - The actual analysis pass. - class GlobalsModRef : public ModulePass, public AliasAnalysis { - /// NonAddressTakenGlobals - The globals that do not have their addresses - /// taken. - std::set<const GlobalValue*> NonAddressTakenGlobals; +/// GlobalsModRef - The actual analysis pass. +class GlobalsModRef : public ModulePass, public AliasAnalysis { + /// NonAddressTakenGlobals - The globals that do not have their addresses + /// taken. + std::set<const GlobalValue *> NonAddressTakenGlobals; - /// IndirectGlobals - The memory pointed to by this global is known to be - /// 'owned' by the global. - std::set<const GlobalValue*> IndirectGlobals; + /// IndirectGlobals - The memory pointed to by this global is known to be + /// 'owned' by the global. + std::set<const GlobalValue *> IndirectGlobals; - /// AllocsForIndirectGlobals - If an instruction allocates memory for an - /// indirect global, this map indicates which one. - std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals; + /// AllocsForIndirectGlobals - If an instruction allocates memory for an + /// indirect global, this map indicates which one. + std::map<const Value *, const GlobalValue *> AllocsForIndirectGlobals; - /// FunctionInfo - For each function, keep track of what globals are - /// modified or read. - std::map<const Function*, FunctionRecord> FunctionInfo; + /// FunctionInfo - For each function, keep track of what globals are + /// modified or read. + std::map<const Function *, FunctionRecord> FunctionInfo; - public: - static char ID; - GlobalsModRef() : ModulePass(ID) { - initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); - } +public: + static char ID; + GlobalsModRef() : ModulePass(ID) { + initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); + } - bool runOnModule(Module &M) override { - InitializeAliasAnalysis(this, &M.getDataLayout()); + bool runOnModule(Module &M) override { + InitializeAliasAnalysis(this, &M.getDataLayout()); - // Find non-addr taken globals. - AnalyzeGlobals(M); + // Find non-addr taken globals. + AnalyzeGlobals(M); - // Propagate on CG. - AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M); - return false; - } + // Propagate on CG. + AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M); + return false; + } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AliasAnalysis::getAnalysisUsage(AU); - AU.addRequired<CallGraphWrapperPass>(); - AU.setPreservesAll(); // Does not transform code - } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AliasAnalysis::getAnalysisUsage(AU); + AU.addRequired<CallGraphWrapperPass>(); + AU.setPreservesAll(); // Does not transform code + } + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) override; + ModRefResult getModRefInfo(ImmutableCallSite CS, + const MemoryLocation &Loc) override; + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) override { + return AliasAnalysis::getModRefInfo(CS1, CS2); + } - //------------------------------------------------ - // Implement the AliasAnalysis API - // - AliasResult alias(const MemoryLocation &LocA, - const MemoryLocation &LocB) override; - ModRefResult getModRefInfo(ImmutableCallSite CS, - const MemoryLocation &Loc) override; - ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - return AliasAnalysis::getModRefInfo(CS1, CS2); + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(const Function *F) override { + ModRefBehavior Min = UnknownModRefBehavior; + + if (FunctionRecord *FR = getFunctionInfo(F)) { + if (FR->FunctionEffect == 0) + Min = DoesNotAccessMemory; + else if ((FR->FunctionEffect & Mod) == 0) + Min = OnlyReadsMemory; } - /// getModRefBehavior - Return the behavior of the specified function if - /// called from the specified call site. The call site may be null in which - /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(const Function *F) override { - ModRefBehavior Min = UnknownModRefBehavior; + return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); + } + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { + ModRefBehavior Min = UnknownModRefBehavior; + if (const Function *F = CS.getCalledFunction()) if (FunctionRecord *FR = getFunctionInfo(F)) { if (FR->FunctionEffect == 0) Min = DoesNotAccessMemory; @@ -137,68 +154,50 @@ namespace { Min = OnlyReadsMemory; } - return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); - } - - /// getModRefBehavior - Return the behavior of the specified function if - /// called from the specified call site. The call site may be null in which - /// case the most generic behavior of this function should be returned. - ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { - ModRefBehavior Min = UnknownModRefBehavior; - - if (const Function* F = CS.getCalledFunction()) - if (FunctionRecord *FR = getFunctionInfo(F)) { - if (FR->FunctionEffect == 0) - Min = DoesNotAccessMemory; - else if ((FR->FunctionEffect & Mod) == 0) - Min = OnlyReadsMemory; - } + return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); + } - return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); - } + void deleteValue(Value *V) override; + void addEscapingUse(Use &U) override; + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + void *getAdjustedAnalysisPointer(AnalysisID PI) override { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis *)this; + return this; + } - void deleteValue(Value *V) override; - void copyValue(Value *From, Value *To) override; - void addEscapingUse(Use &U) override; - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - void *getAdjustedAnalysisPointer(AnalysisID PI) override { - if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; - return this; - } - - private: - /// getFunctionInfo - Return the function info for the function, or null if - /// we don't have anything useful to say about it. - FunctionRecord *getFunctionInfo(const Function *F) { - std::map<const Function*, FunctionRecord>::iterator I = +private: + /// getFunctionInfo - Return the function info for the function, or null if + /// we don't have anything useful to say about it. + FunctionRecord *getFunctionInfo(const Function *F) { + std::map<const Function *, FunctionRecord>::iterator I = FunctionInfo.find(F); - if (I != FunctionInfo.end()) - return &I->second; - return nullptr; - } + if (I != FunctionInfo.end()) + return &I->second; + return nullptr; + } - void AnalyzeGlobals(Module &M); - void AnalyzeCallGraph(CallGraph &CG, Module &M); - bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers, - std::vector<Function*> &Writers, - GlobalValue *OkayStoreDest = nullptr); - bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); - }; + void AnalyzeGlobals(Module &M); + void AnalyzeCallGraph(CallGraph &CG, Module &M); + bool AnalyzeUsesOfPointer(Value *V, std::vector<Function *> &Readers, + std::vector<Function *> &Writers, + GlobalValue *OkayStoreDest = nullptr); + bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); +}; } char GlobalsModRef::ID = 0; -INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, - "globalsmodref-aa", "Simple mod/ref analysis for globals", - false, true, false) +INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", + "Simple mod/ref analysis for globals", false, true, + false) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, - "globalsmodref-aa", "Simple mod/ref analysis for globals", - false, true, false) +INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", + "Simple mod/ref analysis for globals", false, true, + false) Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } @@ -207,7 +206,7 @@ Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } /// (really, their address passed to something nontrivial), record this fact, /// and record the functions that they are used directly in. void GlobalsModRef::AnalyzeGlobals(Module &M) { - std::vector<Function*> Readers, Writers; + std::vector<Function *> Readers, Writers; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->hasLocalLinkage()) { if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { @@ -215,11 +214,12 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) { NonAddressTakenGlobals.insert(I); ++NumNonAddrTakenFunctions; } - Readers.clear(); Writers.clear(); + Readers.clear(); + Writers.clear(); } - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; + ++I) if (I->hasLocalLinkage()) { if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { // Remember that we are tracking this global, and the mod/ref fns @@ -228,7 +228,7 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) { for (unsigned i = 0, e = Readers.size(); i != e; ++i) FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref; - if (!I->isConstant()) // No need to keep track of writers to constants + if (!I->isConstant()) // No need to keep track of writers to constants for (unsigned i = 0, e = Writers.size(); i != e; ++i) FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod; ++NumNonAddrTakenGlobalVars; @@ -238,7 +238,8 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) { AnalyzeIndirectGlobalMemory(I)) ++NumIndirectGlobalVars; } - Readers.clear(); Writers.clear(); + Readers.clear(); + Writers.clear(); } } @@ -249,10 +250,11 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) { /// /// If OkayStoreDest is non-null, stores into this global are allowed. bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, - std::vector<Function*> &Readers, - std::vector<Function*> &Writers, + std::vector<Function *> &Readers, + std::vector<Function *> &Writers, GlobalValue *OkayStoreDest) { - if (!V->getType()->isPointerTy()) return true; + if (!V->getType()->isPointerTy()) + return true; for (Use &U : V->uses()) { User *I = U.getUser(); @@ -262,7 +264,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, if (V == SI->getOperand(1)) { Writers.push_back(SI->getParent()->getParent()); } else if (SI->getOperand(1) != OkayStoreDest) { - return true; // Storing the pointer + return true; // Storing the pointer } } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) { if (AnalyzeUsesOfPointer(I, Readers, Writers)) @@ -282,7 +284,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { if (!isa<ConstantPointerNull>(ICI->getOperand(1))) - return true; // Allow comparison against null. + return true; // Allow comparison against null. } else { return true; } @@ -301,7 +303,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Keep track of values related to the allocation of the memory, f.e. the // value produced by the malloc call and any casts. - std::vector<Value*> AllocRelatedValues; + std::vector<Value *> AllocRelatedValues; // Walk the user list of the global. If we find anything other than a direct // load or store, bail out. @@ -310,13 +312,14 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // The pointer loaded from the global can only be used in simple ways: // we allow addressing of it and loading storing to it. We do *not* allow // storing the loaded pointer somewhere else or passing to a function. - std::vector<Function*> ReadersWriters; + std::vector<Function *> ReadersWriters; if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters)) - return false; // Loaded pointer escapes. + return false; // Loaded pointer escapes. // TODO: Could try some IP mod/ref of the loaded pointer. } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { // Storing the global itself. - if (SI->getOperand(0) == GV) return false; + if (SI->getOperand(0) == GV) + return false; // If storing the null pointer, ignore it. if (isa<ConstantPointerNull>(SI->getOperand(0))) @@ -327,13 +330,13 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { GV->getParent()->getDataLayout()); if (!isAllocLikeFn(Ptr, TLI)) - return false; // Too hard to analyze. + return false; // Too hard to analyze. // Analyze all uses of the allocation. If any of them are used in a // non-simple way (e.g. stored to another global) bail out. - std::vector<Function*> ReadersWriters; + std::vector<Function *> ReadersWriters; if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV)) - return false; // Loaded pointer escapes. + return false; // Loaded pointer escapes. // Remember that this allocation is related to the indirect global. AllocRelatedValues.push_back(Ptr); @@ -360,7 +363,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { // We do a bottom-up SCC traversal of the call graph. In other words, we // visit all callees before callers (leaf-first). - for (scc_iterator<CallGraph*> I = scc_begin(&CG); !I.isAtEnd(); ++I) { + for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { const std::vector<CallGraphNode *> &SCC = *I; assert(!SCC.empty() && "SCC with no functions?"); @@ -437,9 +440,10 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { } // Scan the function bodies for explicit loads or stores. - for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i) + for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef; + ++i) for (inst_iterator II = inst_begin(SCC[i]->getFunction()), - E = inst_end(SCC[i]->getFunction()); + E = inst_end(SCC[i]->getFunction()); II != E && FunctionEffect != ModRef; ++II) if (LoadInst *LI = dyn_cast<LoadInst>(&*II)) { FunctionEffect |= Ref; @@ -474,8 +478,6 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { } } - - /// alias - If one of the pointers is to a global that we are tracking, and the /// other is some random pointer, we know there cannot be an alias, because the /// address of the global isn't taken. @@ -492,8 +494,10 @@ AliasResult GlobalsModRef::alias(const MemoryLocation &LocA, if (GV1 || GV2) { // If the global's address is taken, pretend we don't know it's a pointer to // the global. - if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = nullptr; - if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = nullptr; + if (GV1 && !NonAddressTakenGlobals.count(GV1)) + GV1 = nullptr; + if (GV2 && !NonAddressTakenGlobals.count(GV2)) + GV2 = nullptr; // If the two pointers are derived from two different non-addr-taken // globals, or if one is and the other isn't, we know these can't alias. @@ -554,7 +558,6 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc)); } - //===----------------------------------------------------------------------===// // Methods to update the analysis as a result of the client transformation. // @@ -565,9 +568,10 @@ void GlobalsModRef::deleteValue(Value *V) { // any AllocRelatedValues for it. if (IndirectGlobals.erase(GV)) { // Remove any entries in AllocsForIndirectGlobals for this global. - for (std::map<const Value*, const GlobalValue*>::iterator - I = AllocsForIndirectGlobals.begin(), - E = AllocsForIndirectGlobals.end(); I != E; ) { + for (std::map<const Value *, const GlobalValue *>::iterator + I = AllocsForIndirectGlobals.begin(), + E = AllocsForIndirectGlobals.end(); + I != E;) { if (I->second == GV) { AllocsForIndirectGlobals.erase(I++); } else { @@ -585,16 +589,12 @@ void GlobalsModRef::deleteValue(Value *V) { AliasAnalysis::deleteValue(V); } -void GlobalsModRef::copyValue(Value *From, Value *To) { - AliasAnalysis::copyValue(From, To); -} - void GlobalsModRef::addEscapingUse(Use &U) { // For the purposes of this analysis, it is conservatively correct to treat // a newly escaping value equivalently to a deleted one. We could perhaps // be more precise by processing the new use and attempting to update our // saved analysis results to accommodate it. deleteValue(U); - + AliasAnalysis::addEscapingUse(U); } diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 349b9cac2c2d..c0d2e375cb04 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -783,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { case Intrinsic::memmove: // SROA can usually chew through these intrinsics, but they aren't free. return false; - case Intrinsic::frameescape: + case Intrinsic::localescape: HasFrameEscape = true; return false; } @@ -1424,11 +1424,11 @@ bool InlineCostAnalysis::isInlineViable(Function &F) { cast<CallInst>(CS.getInstruction())->canReturnTwice()) return false; - // Disallow inlining functions that call @llvm.frameescape. Doing this + // Disallow inlining functions that call @llvm.localescape. Doing this // correctly would require major changes to the inliner. if (CS.getCalledFunction() && CS.getCalledFunction()->getIntrinsicID() == - llvm::Intrinsic::frameescape) + llvm::Intrinsic::localescape) return false; } } diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index b88b2496b875..926787d3be91 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -12,8 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/IVUsers.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" @@ -34,6 +36,7 @@ using namespace llvm; char IVUsers::ID = 0; INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", "Induction Variable Users", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) @@ -137,6 +140,11 @@ bool IVUsers::AddUsersImpl(Instruction *I, if (Width > 64 || !DL.isLegalInteger(Width)) return false; + // Don't attempt to promote ephemeral values to indvars. They will be removed + // later anyway. + if (EphValues.count(I)) + return false; + // Get the symbolic expression for this instruction. const SCEV *ISE = SE->getSCEV(I); @@ -244,6 +252,7 @@ IVUsers::IVUsers() } void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<ScalarEvolution>(); @@ -253,10 +262,16 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { L = l; + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache( + *L->getHeader()->getParent()); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = &getAnalysis<ScalarEvolution>(); + // Collect ephemeral values so that AddUsersIfInteresting skips them. + EphValues.clear(); + CodeMetrics::collectEphemeralValues(L, AC, EphValues); + // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 12e406bb1a2d..fa42b48b6cdb 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -3046,7 +3047,8 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + FastMathFlags FMF, const Query &Q, + unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); @@ -3065,6 +3067,14 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Pred == FCmpInst::FCMP_TRUE) return ConstantInt::get(GetCompareTy(LHS), 1); + // UNO/ORD predicates can be trivially folded if NaNs are ignored. + if (FMF.noNaNs()) { + if (Pred == FCmpInst::FCMP_UNO) + return ConstantInt::get(GetCompareTy(LHS), 0); + if (Pred == FCmpInst::FCMP_ORD) + return ConstantInt::get(GetCompareTy(LHS), 1); + } + // fcmp pred x, undef and fcmp pred undef, x // fold to true if unordered, false if ordered if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) { @@ -3151,12 +3161,12 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout &DL, + FastMathFlags FMF, const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, + Query(DL, TLI, DT, AC, CxtI), RecursionLimit); } /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is @@ -3511,6 +3521,82 @@ Value *llvm::SimplifyInsertValueInst( RecursionLimit); } +/// SimplifyExtractValueInst - Given operands for an ExtractValueInst, see if we +/// can fold the result. If not, this returns null. +static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, + const Query &, unsigned) { + if (auto *CAgg = dyn_cast<Constant>(Agg)) + return ConstantFoldExtractValueInstruction(CAgg, Idxs); + + // extractvalue x, (insertvalue y, elt, n), n -> elt + unsigned NumIdxs = Idxs.size(); + for (auto *IVI = dyn_cast<InsertValueInst>(Agg); IVI != nullptr; + IVI = dyn_cast<InsertValueInst>(IVI->getAggregateOperand())) { + ArrayRef<unsigned> InsertValueIdxs = IVI->getIndices(); + unsigned NumInsertValueIdxs = InsertValueIdxs.size(); + unsigned NumCommonIdxs = std::min(NumInsertValueIdxs, NumIdxs); + if (InsertValueIdxs.slice(0, NumCommonIdxs) == + Idxs.slice(0, NumCommonIdxs)) { + if (NumIdxs == NumInsertValueIdxs) + return IVI->getInsertedValueOperand(); + break; + } + } + + return nullptr; +} + +Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, + const DataLayout &DL, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, + AssumptionCache *AC, + const Instruction *CxtI) { + return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI), + RecursionLimit); +} + +/// SimplifyExtractElementInst - Given operands for an ExtractElementInst, see if we +/// can fold the result. If not, this returns null. +static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &, + unsigned) { + if (auto *CVec = dyn_cast<Constant>(Vec)) { + if (auto *CIdx = dyn_cast<Constant>(Idx)) + return ConstantFoldExtractElementInstruction(CVec, CIdx); + + // The index is not relevant if our vector is a splat. + if (auto *Splat = CVec->getSplatValue()) + return Splat; + + if (isa<UndefValue>(Vec)) + return UndefValue::get(Vec->getType()->getVectorElementType()); + } + + // If extracting a specified index from the vector, see if we can recursively + // find a previously computed scalar that was inserted into the vector. + if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) { + unsigned IndexVal = IdxC->getZExtValue(); + unsigned VectorWidth = Vec->getType()->getVectorNumElements(); + + // If this is extracting an invalid index, turn this into undef, to avoid + // crashing the code below. + if (IndexVal >= VectorWidth) + return UndefValue::get(Vec->getType()->getVectorElementType()); + + if (Value *Elt = findScalarElement(Vec, IndexVal)) + return Elt; + } + + return nullptr; +} + +Value *llvm::SimplifyExtractElementInst( + Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI, + const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { + return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI), + RecursionLimit); +} + /// SimplifyPHINode - See if we can fold the given phi. If not, returns null. static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // If all of the PHI's incoming values are the same then replace the PHI node @@ -3670,7 +3756,7 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); - return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse); + return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse); } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -3900,9 +3986,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, I->getOperand(1), DL, TLI, DT, AC, I); break; case Instruction::FCmp: - Result = - SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0), - I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), DL, TLI, DT, AC, I); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), @@ -3920,6 +4006,18 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, IV->getIndices(), DL, TLI, DT, AC, I); break; } + case Instruction::ExtractValue: { + auto *EVI = cast<ExtractValueInst>(I); + Result = SimplifyExtractValueInst(EVI->getAggregateOperand(), + EVI->getIndices(), DL, TLI, DT, AC, I); + break; + } + case Instruction::ExtractElement: { + auto *EEI = cast<ExtractElementInst>(I); + Result = SimplifyExtractElementInst( + EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I); + break; + } case Instruction::PHI: Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I)); break; diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index b11cd7e84a6d..becbae4c5b50 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -48,6 +48,13 @@ static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold( cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8)); unsigned VectorizerParams::RuntimeMemoryCheckThreshold; +/// \brief The maximum iterations used to merge memory checks +static cl::opt<unsigned> MemoryCheckMergeThreshold( + "memory-check-merge-threshold", cl::Hidden, + cl::desc("Maximum number of comparisons done when trying to merge " + "runtime memory checks. (default = 100)"), + cl::init(100)); + /// Maximum SIMD width. const unsigned VectorizerParams::MaxVectorWidth = 64; @@ -112,35 +119,182 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, return SE->getSCEV(Ptr); } -void LoopAccessInfo::RuntimePointerCheck::insert( - ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, - unsigned ASId, const ValueToValueMap &Strides) { +void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, + unsigned DepSetId, unsigned ASId, + const ValueToValueMap &Strides) { // Get the stride replaced scev. const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); assert(AR && "Invalid addrec expression"); const SCEV *Ex = SE->getBackedgeTakenCount(Lp); const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); - Pointers.push_back(Ptr); - Starts.push_back(AR->getStart()); - Ends.push_back(ScEnd); - IsWritePtr.push_back(WritePtr); - DependencySetId.push_back(DepSetId); - AliasSetId.push_back(ASId); + Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId, + Sc); +} + +bool RuntimePointerChecking::needsChecking( + const CheckingPtrGroup &M, const CheckingPtrGroup &N, + const SmallVectorImpl<int> *PtrPartition) const { + for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) + for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) + if (needsChecking(M.Members[I], N.Members[J], PtrPartition)) + return true; + return false; +} + +/// Compare \p I and \p J and return the minimum. +/// Return nullptr in case we couldn't find an answer. +static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J, + ScalarEvolution *SE) { + const SCEV *Diff = SE->getMinusSCEV(J, I); + const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff); + + if (!C) + return nullptr; + if (C->getValue()->isNegative()) + return J; + return I; +} + +bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) { + const SCEV *Start = RtCheck.Pointers[Index].Start; + const SCEV *End = RtCheck.Pointers[Index].End; + + // Compare the starts and ends with the known minimum and maximum + // of this set. We need to know how we compare against the min/max + // of the set in order to be able to emit memchecks. + const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE); + if (!Min0) + return false; + + const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE); + if (!Min1) + return false; + + // Update the low bound expression if we've found a new min value. + if (Min0 == Start) + Low = Start; + + // Update the high bound expression if we've found a new max value. + if (Min1 != End) + High = End; + + Members.push_back(Index); + return true; } -bool LoopAccessInfo::RuntimePointerCheck::needsChecking( +void RuntimePointerChecking::groupChecks( + MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { + // We build the groups from dependency candidates equivalence classes + // because: + // - We know that pointers in the same equivalence class share + // the same underlying object and therefore there is a chance + // that we can compare pointers + // - We wouldn't be able to merge two pointers for which we need + // to emit a memcheck. The classes in DepCands are already + // conveniently built such that no two pointers in the same + // class need checking against each other. + + // We use the following (greedy) algorithm to construct the groups + // For every pointer in the equivalence class: + // For each existing group: + // - if the difference between this pointer and the min/max bounds + // of the group is a constant, then make the pointer part of the + // group and update the min/max bounds of that group as required. + + CheckingGroups.clear(); + + // If we don't have the dependency partitions, construct a new + // checking pointer group for each pointer. + if (!UseDependencies) { + for (unsigned I = 0; I < Pointers.size(); ++I) + CheckingGroups.push_back(CheckingPtrGroup(I, *this)); + return; + } + + unsigned TotalComparisons = 0; + + DenseMap<Value *, unsigned> PositionMap; + for (unsigned Index = 0; Index < Pointers.size(); ++Index) + PositionMap[Pointers[Index].PointerValue] = Index; + + // We need to keep track of what pointers we've already seen so we + // don't process them twice. + SmallSet<unsigned, 2> Seen; + + // Go through all equivalence classes, get the the "pointer check groups" + // and add them to the overall solution. We use the order in which accesses + // appear in 'Pointers' to enforce determinism. + for (unsigned I = 0; I < Pointers.size(); ++I) { + // We've seen this pointer before, and therefore already processed + // its equivalence class. + if (Seen.count(I)) + continue; + + MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue, + Pointers[I].IsWritePtr); + + SmallVector<CheckingPtrGroup, 2> Groups; + auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access)); + + // Because DepCands is constructed by visiting accesses in the order in + // which they appear in alias sets (which is deterministic) and the + // iteration order within an equivalence class member is only dependent on + // the order in which unions and insertions are performed on the + // equivalence class, the iteration order is deterministic. + for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); + MI != ME; ++MI) { + unsigned Pointer = PositionMap[MI->getPointer()]; + bool Merged = false; + // Mark this pointer as seen. + Seen.insert(Pointer); + + // Go through all the existing sets and see if we can find one + // which can include this pointer. + for (CheckingPtrGroup &Group : Groups) { + // Don't perform more than a certain amount of comparisons. + // This should limit the cost of grouping the pointers to something + // reasonable. If we do end up hitting this threshold, the algorithm + // will create separate groups for all remaining pointers. + if (TotalComparisons > MemoryCheckMergeThreshold) + break; + + TotalComparisons++; + + if (Group.addPointer(Pointer)) { + Merged = true; + break; + } + } + + if (!Merged) + // We couldn't add this pointer to any existing set or the threshold + // for the number of comparisons has been reached. Create a new group + // to hold the current pointer. + Groups.push_back(CheckingPtrGroup(Pointer, *this)); + } + + // We've computed the grouped checks for this partition. + // Save the results and continue with the next one. + std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups)); + } +} + +bool RuntimePointerChecking::needsChecking( unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const { + const PointerInfo &PointerI = Pointers[I]; + const PointerInfo &PointerJ = Pointers[J]; + // No need to check if two readonly pointers intersect. - if (!IsWritePtr[I] && !IsWritePtr[J]) + if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr) return false; // Only need to check pointers between two different dependency sets. - if (DependencySetId[I] == DependencySetId[J]) + if (PointerI.DependencySetId == PointerJ.DependencySetId) return false; // Only need to check pointers in the same alias set. - if (AliasSetId[I] != AliasSetId[J]) + if (PointerI.AliasSetId != PointerJ.AliasSetId) return false; // If PtrPartition is set omit checks between pointers of the same partition. @@ -153,45 +307,75 @@ bool LoopAccessInfo::RuntimePointerCheck::needsChecking( return true; } -void LoopAccessInfo::RuntimePointerCheck::print( +void RuntimePointerChecking::print( raw_ostream &OS, unsigned Depth, const SmallVectorImpl<int> *PtrPartition) const { - unsigned NumPointers = Pointers.size(); - if (NumPointers == 0) - return; OS.indent(Depth) << "Run-time memory checks:\n"; + unsigned N = 0; - for (unsigned I = 0; I < NumPointers; ++I) - for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J, PtrPartition)) { - OS.indent(Depth) << N++ << ":\n"; - OS.indent(Depth + 2) << *Pointers[I]; - if (PtrPartition) - OS << " (Partition: " << (*PtrPartition)[I] << ")"; - OS << "\n"; - OS.indent(Depth + 2) << *Pointers[J]; - if (PtrPartition) - OS << " (Partition: " << (*PtrPartition)[J] << ")"; - OS << "\n"; + for (unsigned I = 0; I < CheckingGroups.size(); ++I) + for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) + if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) { + OS.indent(Depth) << "Check " << N++ << ":\n"; + OS.indent(Depth + 2) << "Comparing group " << I << ":\n"; + + for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) { + OS.indent(Depth + 2) + << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n"; + if (PtrPartition) + OS << " (Partition: " + << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")" + << "\n"; + } + + OS.indent(Depth + 2) << "Against group " << J << ":\n"; + + for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) { + OS.indent(Depth + 2) + << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n"; + if (PtrPartition) + OS << " (Partition: " + << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")" + << "\n"; + } } + + OS.indent(Depth) << "Grouped accesses:\n"; + for (unsigned I = 0; I < CheckingGroups.size(); ++I) { + OS.indent(Depth + 2) << "Group " << I << ":\n"; + OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low + << " High: " << *CheckingGroups[I].High << ")\n"; + for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) { + OS.indent(Depth + 6) << "Member: " + << *Pointers[CheckingGroups[I].Members[J]].Expr + << "\n"; + } + } } -unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks( +unsigned RuntimePointerChecking::getNumberOfChecks( const SmallVectorImpl<int> *PtrPartition) const { - unsigned NumPointers = Pointers.size(); + + unsigned NumPartitions = CheckingGroups.size(); unsigned CheckCount = 0; - for (unsigned I = 0; I < NumPointers; ++I) - for (unsigned J = I + 1; J < NumPointers; ++J) - if (needsChecking(I, J, PtrPartition)) + for (unsigned I = 0; I < NumPartitions; ++I) + for (unsigned J = I + 1; J < NumPartitions; ++J) + if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) CheckCount++; return CheckCount; } -bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking( +bool RuntimePointerChecking::needsAnyChecking( const SmallVectorImpl<int> *PtrPartition) const { - return getNumberOfChecks(PtrPartition) != 0; + unsigned NumPointers = Pointers.size(); + + for (unsigned I = 0; I < NumPointers; ++I) + for (unsigned J = I + 1; J < NumPointers; ++J) + if (needsChecking(I, J, PtrPartition)) + return true; + return false; } namespace { @@ -207,7 +391,8 @@ public: AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA) - : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {} + : DL(Dl), AST(*AA), LI(LI), DepCands(DA), + IsRTCheckAnalysisNeeded(false) {} /// \brief Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -226,11 +411,12 @@ public: } /// \brief Check whether we can check the pointers at runtime for - /// non-intersection. Returns true when we have 0 pointers - /// (a check on 0 pointers for non-intersection will always return true). - bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck, - bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop, - const ValueToValueMap &Strides, + /// non-intersection. + /// + /// Returns true if we need no check or if we do and we can generate them + /// (i.e. the pointers have computable bounds). + bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, + Loop *TheLoop, const ValueToValueMap &Strides, bool ShouldCheckStride = false); /// \brief Goes over all memory accesses, checks whether a RT check is needed @@ -239,8 +425,11 @@ public: processMemAccesses(); } - bool isRTCheckNeeded() { return IsRTCheckNeeded; } - + /// \brief Initial processing of memory accesses determined that we need to + /// perform dependency checking. + /// + /// Note that this can later be cleared if we retry memcheck analysis without + /// dependency checking (i.e. ShouldRetryWithRuntimeCheck). bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } /// We decided that no dependence analysis would be used. Reset the state. @@ -255,7 +444,7 @@ private: typedef SetVector<MemAccessInfo> PtrAccessSet; /// \brief Go over all memory access and check whether runtime pointer checks - /// are needed /// and build sets of dependency check candidates. + /// are needed and build sets of dependency check candidates. void processMemAccesses(); /// Set of all accesses. @@ -280,7 +469,14 @@ private: /// dependence check. MemoryDepChecker::DepCandidates &DepCands; - bool IsRTCheckNeeded; + /// \brief Initial processing of memory accesses determined that we may need + /// to add memchecks. Perform the analysis to determine the necessary checks. + /// + /// Note that, this is different from isDependencyCheckNeeded. When we retry + /// memcheck analysis without dependency checking + /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared + /// while this remains set if we have potentially dependent accesses. + bool IsRTCheckAnalysisNeeded; }; } // end anonymous namespace @@ -296,16 +492,16 @@ static bool hasComputableBounds(ScalarEvolution *SE, return AR->isAffine(); } -bool AccessAnalysis::canCheckPtrAtRT( - LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck, - ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap, - bool ShouldCheckStride) { +bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, + ScalarEvolution *SE, Loop *TheLoop, + const ValueToValueMap &StridesMap, + bool ShouldCheckStride) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; - NeedRTCheck = false; - if (!IsRTCheckNeeded) return true; + bool NeedRTCheck = false; + if (!IsRTCheckAnalysisNeeded) return true; bool IsDepCheckNeeded = isDependencyCheckNeeded(); @@ -313,6 +509,9 @@ bool AccessAnalysis::canCheckPtrAtRT( // Accesses between different groups doesn't need to be checked. unsigned ASId = 1; for (auto &AS : AST) { + int NumReadPtrChecks = 0; + int NumWritePtrChecks = 0; + // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. unsigned RunningDepId = 1; @@ -323,6 +522,11 @@ bool AccessAnalysis::canCheckPtrAtRT( bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); MemAccessInfo Access(Ptr, IsWrite); + if (IsWrite) + ++NumWritePtrChecks; + else + ++NumReadPtrChecks; + if (hasComputableBounds(SE, StridesMap, Ptr) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. @@ -341,7 +545,7 @@ bool AccessAnalysis::canCheckPtrAtRT( // Each access has its own dependence set. DepId = RunningDepId++; - RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } else { @@ -350,15 +554,21 @@ bool AccessAnalysis::canCheckPtrAtRT( } } + // If we have at least two writes or one write and a read then we need to + // check them. But there is no need to checks if there is only one + // dependence set for this alias set. + // + // Note that this function computes CanDoRT and NeedRTCheck independently. + // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer + // for which we couldn't find the bounds but we don't actually need to emit + // any checks so it does not matter. + if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2)) + NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 && + NumWritePtrChecks >= 1)); + ++ASId; } - // We need a runtime check if there are any accesses that need checking. - // However, some accesses cannot be checked (for example because we - // can't determine their bounds). In these cases we would need a check - // but wouldn't be able to add it. - NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr); - // If the pointers that we would use for the bounds comparison have different // address spaces, assume the values aren't directly comparable, so we can't // use them for the runtime check. We also have to assume they could @@ -368,14 +578,15 @@ bool AccessAnalysis::canCheckPtrAtRT( for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i + 1; j < NumPointers; ++j) { // Only need to check pointers between two different dependency sets. - if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j]) + if (RtCheck.Pointers[i].DependencySetId == + RtCheck.Pointers[j].DependencySetId) continue; // Only need to check pointers in the same alias set. - if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j]) + if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId) continue; - Value *PtrI = RtCheck.Pointers[i]; - Value *PtrJ = RtCheck.Pointers[j]; + Value *PtrI = RtCheck.Pointers[i].PointerValue; + Value *PtrJ = RtCheck.Pointers[j].PointerValue; unsigned ASi = PtrI->getType()->getPointerAddressSpace(); unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); @@ -387,7 +598,18 @@ bool AccessAnalysis::canCheckPtrAtRT( } } - return CanDoRT; + if (NeedRTCheck && CanDoRT) + RtCheck.groupChecks(DepCands, IsDepCheckNeeded); + + DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr) + << " pointer comparisons.\n"); + + RtCheck.Need = NeedRTCheck; + + bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT; + if (!CanDoRTIfNeeded) + RtCheck.reset(); + return CanDoRTIfNeeded; } void AccessAnalysis::processMemAccesses() { @@ -470,7 +692,7 @@ void AccessAnalysis::processMemAccesses() { // catch "a[i] = a[i] + " without having to do a dependence check). if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { CheckDeps.insert(Access); - IsRTCheckNeeded = true; + IsRTCheckAnalysisNeeded = true; } if (IsWrite) @@ -600,7 +822,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, // Check the step is constant. const SCEV *Step = AR->getStepRecurrence(*SE); - // Calculate the pointer stride and check if it is consecutive. + // Calculate the pointer stride and check if it is constant. const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); if (!C) { DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << @@ -805,11 +1027,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " << *InstMap[BIdx] << ": " << *Dist << "\n"); - // Need consecutive accesses. We don't want to vectorize + // Need accesses with constant stride. We don't want to vectorize // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in // the address space. if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ - DEBUG(dbgs() << "Non-consecutive pointer access\n"); + DEBUG(dbgs() << "Pointer access with non-constant stride\n"); return Dependence::Unknown; } @@ -859,8 +1081,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, unsigned Stride = std::abs(StrideAPtr); if (Stride > 1 && - areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) + areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) { + DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); return Dependence::NoDep; + } // Bail out early if passed-in parameters make vectorization not feasible. unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? @@ -1098,8 +1322,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { unsigned NumReads = 0; unsigned NumReadWrites = 0; - PtrRtCheck.Pointers.clear(); - PtrRtCheck.Need = false; + PtrRtChecking.Pointers.clear(); + PtrRtChecking.Need = false; const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); @@ -1258,28 +1482,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. - bool NeedRTCheck; - bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, - NeedRTCheck, SE, - TheLoop, Strides); - - DEBUG(dbgs() << "LAA: We need to do " - << PtrRtCheck.getNumberOfChecks(nullptr) - << " pointer comparisons.\n"); - - // Check that we found the bounds for the pointer. - if (CanDoRT) - DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); - else if (NeedRTCheck) { + bool CanDoRTIfNeeded = + Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides); + if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); - DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << - "the array bounds.\n"); - PtrRtCheck.reset(); + DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " + << "the array bounds.\n"); CanVecMem = false; return; } - PtrRtCheck.Need = NeedRTCheck; + DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n"); CanVecMem = true; if (Accesses.isDependencyCheckNeeded()) { @@ -1290,23 +1503,21 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) { DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); - NeedRTCheck = true; // Clear the dependency checks. We assume they are not needed. Accesses.resetDepChecks(DepChecker); - PtrRtCheck.reset(); - PtrRtCheck.Need = true; + PtrRtChecking.reset(); + PtrRtChecking.Need = true; - CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE, - TheLoop, Strides, true); + CanDoRTIfNeeded = + Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); // Check that we found the bounds for the pointer. - if (NeedRTCheck && !CanDoRT) { + if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot check memory dependencies at runtime"); DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); - PtrRtCheck.reset(); CanVecMem = false; return; } @@ -1317,8 +1528,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { if (CanVecMem) DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We" - << (NeedRTCheck ? "" : " don't") - << " need a runtime memory check.\n"); + << (PtrRtChecking.Need ? "" : " don't") + << " need runtime memory checks.\n"); else { emitAnalysis(LoopAccessReport() << "unsafe dependent memory operations in loop"); @@ -1357,35 +1568,38 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const { - if (!PtrRtCheck.Need) + if (!PtrRtChecking.Need) return std::make_pair(nullptr, nullptr); - unsigned NumPointers = PtrRtCheck.Pointers.size(); - SmallVector<TrackingVH<Value> , 2> Starts; - SmallVector<TrackingVH<Value> , 2> Ends; + SmallVector<TrackingVH<Value>, 2> Starts; + SmallVector<TrackingVH<Value>, 2> Ends; LLVMContext &Ctx = Loc->getContext(); SCEVExpander Exp(*SE, DL, "induction"); Instruction *FirstInst = nullptr; - for (unsigned i = 0; i < NumPointers; ++i) { - Value *Ptr = PtrRtCheck.Pointers[i]; + for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { + const RuntimePointerChecking::CheckingPtrGroup &CG = + PtrRtChecking.CheckingGroups[i]; + Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue; const SCEV *Sc = SE->getSCEV(Ptr); if (SE->isLoopInvariant(Sc, TheLoop)) { - DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << - *Ptr <<"\n"); + DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr + << "\n"); Starts.push_back(Ptr); Ends.push_back(Ptr); } else { - DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n'); unsigned AS = Ptr->getType()->getPointerAddressSpace(); // Use this type for pointer arithmetic. Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); + Value *Start = nullptr, *End = nullptr; - Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc); - Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc); + DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); + Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc); + End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc); + DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n"); Starts.push_back(Start); Ends.push_back(End); } @@ -1394,9 +1608,14 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( IRBuilder<> ChkBuilder(Loc); // Our instructions might fold to a constant. Value *MemoryRuntimeCheck = nullptr; - for (unsigned i = 0; i < NumPointers; ++i) { - for (unsigned j = i+1; j < NumPointers; ++j) { - if (!PtrRtCheck.needsChecking(i, j, PtrPartition)) + for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { + for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) { + const RuntimePointerChecking::CheckingPtrGroup &CGI = + PtrRtChecking.CheckingGroups[i]; + const RuntimePointerChecking::CheckingPtrGroup &CGJ = + PtrRtChecking.CheckingGroups[j]; + + if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition)) continue; unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); @@ -1447,7 +1666,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), + : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { @@ -1457,7 +1676,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (CanVecMem) { - if (PtrRtCheck.Need) + if (PtrRtChecking.Need) OS.indent(Depth) << "Memory dependences are safe with run-time checks\n"; else OS.indent(Depth) << "Memory dependences are safe\n"; @@ -1476,7 +1695,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { OS.indent(Depth) << "Too many interesting dependences, not recorded\n"; // List the pair of accesses need run-time checks to prove independence. - PtrRtCheck.print(OS, Depth); + PtrRtChecking.print(OS, Depth); OS << "\n"; OS.indent(Depth) << "Store to invariant address was " diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp index 7617622b9ab6..322a9a80de4c 100644 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -72,7 +72,6 @@ namespace { } void deleteValue(Value *V) override {} - void copyValue(Value *From, Value *To) override {} void addEscapingUse(Use &U) override {} /// getAdjustedAnalysisPointer - This method is used when a pass implements diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 520d1e5ef87d..7d1c3fbef68a 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -28,12 +28,12 @@ namespace { /// /// This is used when no target specific information is available. struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> { - explicit NoTTIImpl(const DataLayout *DL) + explicit NoTTIImpl(const DataLayout &DL) : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {} }; } -TargetTransformInfo::TargetTransformInfo(const DataLayout *DL) +TargetTransformInfo::TargetTransformInfo(const DataLayout &DL) : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {} TargetTransformInfo::~TargetTransformInfo() {} @@ -304,7 +304,7 @@ TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) { char TargetIRAnalysis::PassID; TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) { - return Result(&F.getParent()->getDataLayout()); + return Result(F.getParent()->getDataLayout()); } // Register the basic pass. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index c45005f343d3..fa0d7798cae9 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1464,7 +1464,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // If the object is defined in the current Module, we'll be giving // it the preferred alignment. Otherwise, we have to assume that it // may only have the minimum ABI alignment. - if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) + if (GVar->isStrongDefinitionForLinker()) Align = DL.getPreferredAlignment(GVar); else Align = DL.getABITypeAlignment(ObjectType); diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 96fddd103cc5..67f68dc8391e 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -11,7 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Value.h" /// \brief Identify if the intrinsic is trivially vectorizable. /// This method returns true if the intrinsic's argument types are all @@ -211,3 +217,195 @@ llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI, return Intrinsic::not_intrinsic; } + +/// \brief Find the operand of the GEP that should be checked for consecutive +/// stores. This ignores trailing indices that have no effect on the final +/// pointer. +unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) { + const DataLayout &DL = Gep->getModule()->getDataLayout(); + unsigned LastOperand = Gep->getNumOperands() - 1; + unsigned GEPAllocSize = DL.getTypeAllocSize( + cast<PointerType>(Gep->getType()->getScalarType())->getElementType()); + + // Walk backwards and try to peel off zeros. + while (LastOperand > 1 && + match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) { + // Find the type we're currently indexing into. + gep_type_iterator GEPTI = gep_type_begin(Gep); + std::advance(GEPTI, LastOperand - 1); + + // If it's a type with the same allocation size as the result of the GEP we + // can peel off the zero index. + if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize) + break; + --LastOperand; + } + + return LastOperand; +} + +/// \brief If the argument is a GEP, then returns the operand identified by +/// getGEPInductionOperand. However, if there is some other non-loop-invariant +/// operand, it returns that instead. +llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE, + Loop *Lp) { + GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); + if (!GEP) + return Ptr; + + unsigned InductionOperand = getGEPInductionOperand(GEP); + + // Check that all of the gep indices are uniform except for our induction + // operand. + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) + if (i != InductionOperand && + !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp)) + return Ptr; + return GEP->getOperand(InductionOperand); +} + +/// \brief If a value has only one user that is a CastInst, return it. +llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) { + llvm::Value *UniqueCast = nullptr; + for (User *U : Ptr->users()) { + CastInst *CI = dyn_cast<CastInst>(U); + if (CI && CI->getType() == Ty) { + if (!UniqueCast) + UniqueCast = CI; + else + return nullptr; + } + } + return UniqueCast; +} + +/// \brief Get the stride of a pointer access in a loop. Looks for symbolic +/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. +llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE, + Loop *Lp) { + const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); + if (!PtrTy || PtrTy->isAggregateType()) + return nullptr; + + // Try to remove a gep instruction to make the pointer (actually index at this + // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the + // pointer, otherwise, we are analyzing the index. + llvm::Value *OrigPtr = Ptr; + + // The size of the pointer access. + int64_t PtrAccessSize = 1; + + Ptr = stripGetElementPtr(Ptr, SE, Lp); + const SCEV *V = SE->getSCEV(Ptr); + + if (Ptr != OrigPtr) + // Strip off casts. + while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) + V = C->getOperand(); + + const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V); + if (!S) + return nullptr; + + V = S->getStepRecurrence(*SE); + if (!V) + return nullptr; + + // Strip off the size of access multiplication if we are still analyzing the + // pointer. + if (OrigPtr == Ptr) { + const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout(); + DL.getTypeAllocSize(PtrTy->getElementType()); + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) { + if (M->getOperand(0)->getSCEVType() != scConstant) + return nullptr; + + const APInt &APStepVal = + cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue(); + + // Huge step value - give up. + if (APStepVal.getBitWidth() > 64) + return nullptr; + + int64_t StepVal = APStepVal.getSExtValue(); + if (PtrAccessSize != StepVal) + return nullptr; + V = M->getOperand(1); + } + } + + // Strip off casts. + Type *StripedOffRecurrenceCast = nullptr; + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) { + StripedOffRecurrenceCast = C->getType(); + V = C->getOperand(); + } + + // Look for the loop invariant symbolic value. + const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V); + if (!U) + return nullptr; + + llvm::Value *Stride = U->getValue(); + if (!Lp->isLoopInvariant(Stride)) + return nullptr; + + // If we have stripped off the recurrence cast we have to make sure that we + // return the value that is used in this loop so that we can replace it later. + if (StripedOffRecurrenceCast) + Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast); + + return Stride; +} + +/// \brief Given a vector and an element number, see if the scalar value is +/// already around as a register, for example if it were inserted then extracted +/// from the vector. +llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) { + assert(V->getType()->isVectorTy() && "Not looking at a vector?"); + VectorType *VTy = cast<VectorType>(V->getType()); + unsigned Width = VTy->getNumElements(); + if (EltNo >= Width) // Out of range access. + return UndefValue::get(VTy->getElementType()); + + if (Constant *C = dyn_cast<Constant>(V)) + return C->getAggregateElement(EltNo); + + if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { + // If this is an insert to a variable element, we don't know what it is. + if (!isa<ConstantInt>(III->getOperand(2))) + return nullptr; + unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); + + // If this is an insert to the element we are looking for, return the + // inserted value. + if (EltNo == IIElt) + return III->getOperand(1); + + // Otherwise, the insertelement doesn't modify the value, recurse on its + // vector input. + return findScalarElement(III->getOperand(0), EltNo); + } + + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { + unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); + int InEl = SVI->getMaskValue(EltNo); + if (InEl < 0) + return UndefValue::get(VTy->getElementType()); + if (InEl < (int)LHSWidth) + return findScalarElement(SVI->getOperand(0), InEl); + return findScalarElement(SVI->getOperand(1), InEl - LHSWidth); + } + + // Extract a value from a vector add operation with a constant zero. + Value *Val = nullptr; Constant *Con = nullptr; + if (match(V, + llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val), + llvm::PatternMatch::m_Constant(Con)))) { + if (Con->getAggregateElement(EltNo)->isNullValue()) + return findScalarElement(Val, EltNo); + } + + // Otherwise, we don't know. + return nullptr; +} |