summaryrefslogtreecommitdiff
path: root/lib/Analysis
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-08-07 23:01:33 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-08-07 23:01:33 +0000
commitee8648bdac07986a0f1ec897b02ec82a2f144d46 (patch)
tree52d1861acda1205241ee35a94aa63129c604d469 /lib/Analysis
parent1a82d4c088707c791c792f6822f611b47a12bdfe (diff)
Diffstat (limited to 'lib/Analysis')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp5
-rw-r--r--lib/Analysis/AliasDebugger.cpp4
-rw-r--r--lib/Analysis/AliasSetTracker.cpp3
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp6
-rw-r--r--lib/Analysis/ConstantFolding.cpp6
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp320
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp6
-rw-r--r--lib/Analysis/IVUsers.cpp17
-rw-r--r--lib/Analysis/InstructionSimplify.cpp114
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp447
-rw-r--r--lib/Analysis/NoAliasAnalysis.cpp1
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp6
-rw-r--r--lib/Analysis/ValueTracking.cpp2
-rw-r--r--lib/Analysis/VectorUtils.cpp198
14 files changed, 832 insertions, 303 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index ad0727a0e0e5..44d137dffd22 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -71,11 +71,6 @@ void AliasAnalysis::deleteValue(Value *V) {
AA->deleteValue(V);
}
-void AliasAnalysis::copyValue(Value *From, Value *To) {
- assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- AA->copyValue(From, To);
-}
-
void AliasAnalysis::addEscapingUse(Use &U) {
assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
AA->addEscapingUse(U);
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index 1ef49fc02fef..e5107b3bc827 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -124,10 +124,6 @@ namespace {
assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
AliasAnalysis::deleteValue(V);
}
- void copyValue(Value *From, Value *To) override {
- Vals.insert(To);
- AliasAnalysis::copyValue(From, To);
- }
};
}
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index bf8cda1ffaec..54d0f4304e1f 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -544,9 +544,6 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
// the tracker already knows about a value, it will ignore the request.
//
void AliasSetTracker::copyValue(Value *From, Value *To) {
- // Notify the alias analysis implementation that this value is copied.
- AA.copyValue(From, To);
-
// First, look up the PointerRec for this pointer.
PointerMapType::iterator I = PointerMap.find_as(From);
if (I == PointerMap.end())
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 8e812252fdfe..68f766edb301 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -685,6 +685,9 @@ BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
if (CS.onlyReadsMemory())
Min = OnlyReadsMemory;
+ if (CS.onlyAccessesArgMemory())
+ Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+
// The AliasAnalysis base class has some smarts, lets use them.
return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
}
@@ -710,6 +713,9 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
if (F->onlyReadsMemory())
Min = OnlyReadsMemory;
+ if (F->onlyAccessesArgMemory())
+ Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+
const TargetLibraryInfo &TLI =
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
if (isMemsetPattern16(F, TLI))
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 2f4c6a92f9af..02a5aef03223 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1234,6 +1234,8 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::sqrt:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
case Intrinsic::pow:
case Intrinsic::powi:
case Intrinsic::bswap:
@@ -1450,6 +1452,10 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
return ConstantFoldFP(floor, V, Ty);
case Intrinsic::ceil:
return ConstantFoldFP(ceil, V, Ty);
+ case Intrinsic::sin:
+ return ConstantFoldFP(sin, V, Ty);
+ case Intrinsic::cos:
+ return ConstantFoldFP(cos, V, Ty);
}
if (!TLI)
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index f1ddde252924..18d45dd6a396 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -42,94 +42,111 @@ STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
namespace {
- /// FunctionRecord - One instance of this structure is stored for every
- /// function in the program. Later, the entries for these functions are
- /// removed if the function is found to call an external function (in which
- /// case we know nothing about it.
- struct FunctionRecord {
- /// GlobalInfo - Maintain mod/ref info for all of the globals without
- /// addresses taken that are read or written (transitively) by this
- /// function.
- std::map<const GlobalValue*, unsigned> GlobalInfo;
-
- /// MayReadAnyGlobal - May read global variables, but it is not known which.
- bool MayReadAnyGlobal;
-
- unsigned getInfoForGlobal(const GlobalValue *GV) const {
- unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
- std::map<const GlobalValue*, unsigned>::const_iterator I =
+/// FunctionRecord - One instance of this structure is stored for every
+/// function in the program. Later, the entries for these functions are
+/// removed if the function is found to call an external function (in which
+/// case we know nothing about it.
+struct FunctionRecord {
+ /// GlobalInfo - Maintain mod/ref info for all of the globals without
+ /// addresses taken that are read or written (transitively) by this
+ /// function.
+ std::map<const GlobalValue *, unsigned> GlobalInfo;
+
+ /// MayReadAnyGlobal - May read global variables, but it is not known which.
+ bool MayReadAnyGlobal;
+
+ unsigned getInfoForGlobal(const GlobalValue *GV) const {
+ unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
+ std::map<const GlobalValue *, unsigned>::const_iterator I =
GlobalInfo.find(GV);
- if (I != GlobalInfo.end())
- Effect |= I->second;
- return Effect;
- }
+ if (I != GlobalInfo.end())
+ Effect |= I->second;
+ return Effect;
+ }
- /// FunctionEffect - Capture whether or not this function reads or writes to
- /// ANY memory. If not, we can do a lot of aggressive analysis on it.
- unsigned FunctionEffect;
+ /// FunctionEffect - Capture whether or not this function reads or writes to
+ /// ANY memory. If not, we can do a lot of aggressive analysis on it.
+ unsigned FunctionEffect;
- FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {}
- };
+ FunctionRecord() : MayReadAnyGlobal(false), FunctionEffect(0) {}
+};
- /// GlobalsModRef - The actual analysis pass.
- class GlobalsModRef : public ModulePass, public AliasAnalysis {
- /// NonAddressTakenGlobals - The globals that do not have their addresses
- /// taken.
- std::set<const GlobalValue*> NonAddressTakenGlobals;
+/// GlobalsModRef - The actual analysis pass.
+class GlobalsModRef : public ModulePass, public AliasAnalysis {
+ /// NonAddressTakenGlobals - The globals that do not have their addresses
+ /// taken.
+ std::set<const GlobalValue *> NonAddressTakenGlobals;
- /// IndirectGlobals - The memory pointed to by this global is known to be
- /// 'owned' by the global.
- std::set<const GlobalValue*> IndirectGlobals;
+ /// IndirectGlobals - The memory pointed to by this global is known to be
+ /// 'owned' by the global.
+ std::set<const GlobalValue *> IndirectGlobals;
- /// AllocsForIndirectGlobals - If an instruction allocates memory for an
- /// indirect global, this map indicates which one.
- std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals;
+ /// AllocsForIndirectGlobals - If an instruction allocates memory for an
+ /// indirect global, this map indicates which one.
+ std::map<const Value *, const GlobalValue *> AllocsForIndirectGlobals;
- /// FunctionInfo - For each function, keep track of what globals are
- /// modified or read.
- std::map<const Function*, FunctionRecord> FunctionInfo;
+ /// FunctionInfo - For each function, keep track of what globals are
+ /// modified or read.
+ std::map<const Function *, FunctionRecord> FunctionInfo;
- public:
- static char ID;
- GlobalsModRef() : ModulePass(ID) {
- initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
- }
+public:
+ static char ID;
+ GlobalsModRef() : ModulePass(ID) {
+ initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnModule(Module &M) override {
- InitializeAliasAnalysis(this, &M.getDataLayout());
+ bool runOnModule(Module &M) override {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
- // Find non-addr taken globals.
- AnalyzeGlobals(M);
+ // Find non-addr taken globals.
+ AnalyzeGlobals(M);
- // Propagate on CG.
- AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
- return false;
- }
+ // Propagate on CG.
+ AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
+ return false;
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.addRequired<CallGraphWrapperPass>();
- AU.setPreservesAll(); // Does not transform code
- }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AliasAnalysis::getAnalysisUsage(AU);
+ AU.addRequired<CallGraphWrapperPass>();
+ AU.setPreservesAll(); // Does not transform code
+ }
+
+ //------------------------------------------------
+ // Implement the AliasAnalysis API
+ //
+ AliasResult alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) override;
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const MemoryLocation &Loc) override;
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) override {
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+ }
- //------------------------------------------------
- // Implement the AliasAnalysis API
- //
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS,
- const MemoryLocation &Loc) override;
- ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- return AliasAnalysis::getModRefInfo(CS1, CS2);
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(const Function *F) override {
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ if (FunctionRecord *FR = getFunctionInfo(F)) {
+ if (FR->FunctionEffect == 0)
+ Min = DoesNotAccessMemory;
+ else if ((FR->FunctionEffect & Mod) == 0)
+ Min = OnlyReadsMemory;
}
- /// getModRefBehavior - Return the behavior of the specified function if
- /// called from the specified call site. The call site may be null in which
- /// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(const Function *F) override {
- ModRefBehavior Min = UnknownModRefBehavior;
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+ }
+
+ /// getModRefBehavior - Return the behavior of the specified function if
+ /// called from the specified call site. The call site may be null in which
+ /// case the most generic behavior of this function should be returned.
+ ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
+ ModRefBehavior Min = UnknownModRefBehavior;
+ if (const Function *F = CS.getCalledFunction())
if (FunctionRecord *FR = getFunctionInfo(F)) {
if (FR->FunctionEffect == 0)
Min = DoesNotAccessMemory;
@@ -137,68 +154,50 @@ namespace {
Min = OnlyReadsMemory;
}
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
- }
-
- /// getModRefBehavior - Return the behavior of the specified function if
- /// called from the specified call site. The call site may be null in which
- /// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
- ModRefBehavior Min = UnknownModRefBehavior;
-
- if (const Function* F = CS.getCalledFunction())
- if (FunctionRecord *FR = getFunctionInfo(F)) {
- if (FR->FunctionEffect == 0)
- Min = DoesNotAccessMemory;
- else if ((FR->FunctionEffect & Mod) == 0)
- Min = OnlyReadsMemory;
- }
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+ }
- return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
- }
+ void deleteValue(Value *V) override;
+ void addEscapingUse(Use &U) override;
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ void *getAdjustedAnalysisPointer(AnalysisID PI) override {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis *)this;
+ return this;
+ }
- void deleteValue(Value *V) override;
- void copyValue(Value *From, Value *To) override;
- void addEscapingUse(Use &U) override;
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- void *getAdjustedAnalysisPointer(AnalysisID PI) override {
- if (PI == &AliasAnalysis::ID)
- return (AliasAnalysis*)this;
- return this;
- }
-
- private:
- /// getFunctionInfo - Return the function info for the function, or null if
- /// we don't have anything useful to say about it.
- FunctionRecord *getFunctionInfo(const Function *F) {
- std::map<const Function*, FunctionRecord>::iterator I =
+private:
+ /// getFunctionInfo - Return the function info for the function, or null if
+ /// we don't have anything useful to say about it.
+ FunctionRecord *getFunctionInfo(const Function *F) {
+ std::map<const Function *, FunctionRecord>::iterator I =
FunctionInfo.find(F);
- if (I != FunctionInfo.end())
- return &I->second;
- return nullptr;
- }
+ if (I != FunctionInfo.end())
+ return &I->second;
+ return nullptr;
+ }
- void AnalyzeGlobals(Module &M);
- void AnalyzeCallGraph(CallGraph &CG, Module &M);
- bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers,
- std::vector<Function*> &Writers,
- GlobalValue *OkayStoreDest = nullptr);
- bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
- };
+ void AnalyzeGlobals(Module &M);
+ void AnalyzeCallGraph(CallGraph &CG, Module &M);
+ bool AnalyzeUsesOfPointer(Value *V, std::vector<Function *> &Readers,
+ std::vector<Function *> &Writers,
+ GlobalValue *OkayStoreDest = nullptr);
+ bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
+};
}
char GlobalsModRef::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
- "globalsmodref-aa", "Simple mod/ref analysis for globals",
- false, true, false)
+INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
+ "Simple mod/ref analysis for globals", false, true,
+ false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
- "globalsmodref-aa", "Simple mod/ref analysis for globals",
- false, true, false)
+INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
+ "Simple mod/ref analysis for globals", false, true,
+ false)
Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
@@ -207,7 +206,7 @@ Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
/// (really, their address passed to something nontrivial), record this fact,
/// and record the functions that they are used directly in.
void GlobalsModRef::AnalyzeGlobals(Module &M) {
- std::vector<Function*> Readers, Writers;
+ std::vector<Function *> Readers, Writers;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
if (I->hasLocalLinkage()) {
if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
@@ -215,11 +214,12 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
NonAddressTakenGlobals.insert(I);
++NumNonAddrTakenFunctions;
}
- Readers.clear(); Writers.clear();
+ Readers.clear();
+ Writers.clear();
}
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
+ ++I)
if (I->hasLocalLinkage()) {
if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
// Remember that we are tracking this global, and the mod/ref fns
@@ -228,7 +228,7 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
for (unsigned i = 0, e = Readers.size(); i != e; ++i)
FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
- if (!I->isConstant()) // No need to keep track of writers to constants
+ if (!I->isConstant()) // No need to keep track of writers to constants
for (unsigned i = 0, e = Writers.size(); i != e; ++i)
FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
++NumNonAddrTakenGlobalVars;
@@ -238,7 +238,8 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
AnalyzeIndirectGlobalMemory(I))
++NumIndirectGlobalVars;
}
- Readers.clear(); Writers.clear();
+ Readers.clear();
+ Writers.clear();
}
}
@@ -249,10 +250,11 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
///
/// If OkayStoreDest is non-null, stores into this global are allowed.
bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
- std::vector<Function*> &Readers,
- std::vector<Function*> &Writers,
+ std::vector<Function *> &Readers,
+ std::vector<Function *> &Writers,
GlobalValue *OkayStoreDest) {
- if (!V->getType()->isPointerTy()) return true;
+ if (!V->getType()->isPointerTy())
+ return true;
for (Use &U : V->uses()) {
User *I = U.getUser();
@@ -262,7 +264,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
if (V == SI->getOperand(1)) {
Writers.push_back(SI->getParent()->getParent());
} else if (SI->getOperand(1) != OkayStoreDest) {
- return true; // Storing the pointer
+ return true; // Storing the pointer
}
} else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
if (AnalyzeUsesOfPointer(I, Readers, Writers))
@@ -282,7 +284,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
}
} else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
- return true; // Allow comparison against null.
+ return true; // Allow comparison against null.
} else {
return true;
}
@@ -301,7 +303,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
// Keep track of values related to the allocation of the memory, f.e. the
// value produced by the malloc call and any casts.
- std::vector<Value*> AllocRelatedValues;
+ std::vector<Value *> AllocRelatedValues;
// Walk the user list of the global. If we find anything other than a direct
// load or store, bail out.
@@ -310,13 +312,14 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
// The pointer loaded from the global can only be used in simple ways:
// we allow addressing of it and loading storing to it. We do *not* allow
// storing the loaded pointer somewhere else or passing to a function.
- std::vector<Function*> ReadersWriters;
+ std::vector<Function *> ReadersWriters;
if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
- return false; // Loaded pointer escapes.
+ return false; // Loaded pointer escapes.
// TODO: Could try some IP mod/ref of the loaded pointer.
} else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// Storing the global itself.
- if (SI->getOperand(0) == GV) return false;
+ if (SI->getOperand(0) == GV)
+ return false;
// If storing the null pointer, ignore it.
if (isa<ConstantPointerNull>(SI->getOperand(0)))
@@ -327,13 +330,13 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
GV->getParent()->getDataLayout());
if (!isAllocLikeFn(Ptr, TLI))
- return false; // Too hard to analyze.
+ return false; // Too hard to analyze.
// Analyze all uses of the allocation. If any of them are used in a
// non-simple way (e.g. stored to another global) bail out.
- std::vector<Function*> ReadersWriters;
+ std::vector<Function *> ReadersWriters;
if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
- return false; // Loaded pointer escapes.
+ return false; // Loaded pointer escapes.
// Remember that this allocation is related to the indirect global.
AllocRelatedValues.push_back(Ptr);
@@ -360,7 +363,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// We do a bottom-up SCC traversal of the call graph. In other words, we
// visit all callees before callers (leaf-first).
- for (scc_iterator<CallGraph*> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+ for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
const std::vector<CallGraphNode *> &SCC = *I;
assert(!SCC.empty() && "SCC with no functions?");
@@ -437,9 +440,10 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
}
// Scan the function bodies for explicit loads or stores.
- for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i)
+ for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;
+ ++i)
for (inst_iterator II = inst_begin(SCC[i]->getFunction()),
- E = inst_end(SCC[i]->getFunction());
+ E = inst_end(SCC[i]->getFunction());
II != E && FunctionEffect != ModRef; ++II)
if (LoadInst *LI = dyn_cast<LoadInst>(&*II)) {
FunctionEffect |= Ref;
@@ -474,8 +478,6 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
}
}
-
-
/// alias - If one of the pointers is to a global that we are tracking, and the
/// other is some random pointer, we know there cannot be an alias, because the
/// address of the global isn't taken.
@@ -492,8 +494,10 @@ AliasResult GlobalsModRef::alias(const MemoryLocation &LocA,
if (GV1 || GV2) {
// If the global's address is taken, pretend we don't know it's a pointer to
// the global.
- if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = nullptr;
- if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = nullptr;
+ if (GV1 && !NonAddressTakenGlobals.count(GV1))
+ GV1 = nullptr;
+ if (GV2 && !NonAddressTakenGlobals.count(GV2))
+ GV2 = nullptr;
// If the two pointers are derived from two different non-addr-taken
// globals, or if one is and the other isn't, we know these can't alias.
@@ -554,7 +558,6 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
}
-
//===----------------------------------------------------------------------===//
// Methods to update the analysis as a result of the client transformation.
//
@@ -565,9 +568,10 @@ void GlobalsModRef::deleteValue(Value *V) {
// any AllocRelatedValues for it.
if (IndirectGlobals.erase(GV)) {
// Remove any entries in AllocsForIndirectGlobals for this global.
- for (std::map<const Value*, const GlobalValue*>::iterator
- I = AllocsForIndirectGlobals.begin(),
- E = AllocsForIndirectGlobals.end(); I != E; ) {
+ for (std::map<const Value *, const GlobalValue *>::iterator
+ I = AllocsForIndirectGlobals.begin(),
+ E = AllocsForIndirectGlobals.end();
+ I != E;) {
if (I->second == GV) {
AllocsForIndirectGlobals.erase(I++);
} else {
@@ -585,16 +589,12 @@ void GlobalsModRef::deleteValue(Value *V) {
AliasAnalysis::deleteValue(V);
}
-void GlobalsModRef::copyValue(Value *From, Value *To) {
- AliasAnalysis::copyValue(From, To);
-}
-
void GlobalsModRef::addEscapingUse(Use &U) {
// For the purposes of this analysis, it is conservatively correct to treat
// a newly escaping value equivalently to a deleted one. We could perhaps
// be more precise by processing the new use and attempting to update our
// saved analysis results to accommodate it.
deleteValue(U);
-
+
AliasAnalysis::addEscapingUse(U);
}
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 349b9cac2c2d..c0d2e375cb04 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -783,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
case Intrinsic::memmove:
// SROA can usually chew through these intrinsics, but they aren't free.
return false;
- case Intrinsic::frameescape:
+ case Intrinsic::localescape:
HasFrameEscape = true;
return false;
}
@@ -1424,11 +1424,11 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
cast<CallInst>(CS.getInstruction())->canReturnTwice())
return false;
- // Disallow inlining functions that call @llvm.frameescape. Doing this
+ // Disallow inlining functions that call @llvm.localescape. Doing this
// correctly would require major changes to the inliner.
if (CS.getCalledFunction() &&
CS.getCalledFunction()->getIntrinsicID() ==
- llvm::Intrinsic::frameescape)
+ llvm::Intrinsic::localescape)
return false;
}
}
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index b88b2496b875..926787d3be91 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -12,8 +12,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/IVUsers.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -34,6 +36,7 @@ using namespace llvm;
char IVUsers::ID = 0;
INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
"Induction Variable Users", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
@@ -137,6 +140,11 @@ bool IVUsers::AddUsersImpl(Instruction *I,
if (Width > 64 || !DL.isLegalInteger(Width))
return false;
+ // Don't attempt to promote ephemeral values to indvars. They will be removed
+ // later anyway.
+ if (EphValues.count(I))
+ return false;
+
// Get the symbolic expression for this instruction.
const SCEV *ISE = SE->getSCEV(I);
@@ -244,6 +252,7 @@ IVUsers::IVUsers()
}
void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
@@ -253,10 +262,16 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
L = l;
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+ *L->getHeader()->getParent());
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
+ // Collect ephemeral values so that AddUsersIfInteresting skips them.
+ EphValues.clear();
+ CodeMetrics::collectEphemeralValues(L, AC, EphValues);
+
// Find all uses of induction variables in this loop, and categorize
// them by stride. Start by finding all of the PHI nodes in the header for
// this loop. If they are induction variables, inspect their uses.
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 12e406bb1a2d..fa42b48b6cdb 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -3046,7 +3047,8 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const Query &Q, unsigned MaxRecurse) {
+ FastMathFlags FMF, const Query &Q,
+ unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
@@ -3065,6 +3067,14 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Pred == FCmpInst::FCMP_TRUE)
return ConstantInt::get(GetCompareTy(LHS), 1);
+ // UNO/ORD predicates can be trivially folded if NaNs are ignored.
+ if (FMF.noNaNs()) {
+ if (Pred == FCmpInst::FCMP_UNO)
+ return ConstantInt::get(GetCompareTy(LHS), 0);
+ if (Pred == FCmpInst::FCMP_ORD)
+ return ConstantInt::get(GetCompareTy(LHS), 1);
+ }
+
// fcmp pred x, undef and fcmp pred undef, x
// fold to true if unordered, false if ordered
if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
@@ -3151,12 +3161,12 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const DataLayout &DL,
+ FastMathFlags FMF, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
- RecursionLimit);
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF,
+ Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
@@ -3511,6 +3521,82 @@ Value *llvm::SimplifyInsertValueInst(
RecursionLimit);
}
+/// SimplifyExtractValueInst - Given operands for an ExtractValueInst, see if we
+/// can fold the result. If not, this returns null.
+static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+ const Query &, unsigned) {
+ if (auto *CAgg = dyn_cast<Constant>(Agg))
+ return ConstantFoldExtractValueInstruction(CAgg, Idxs);
+
+ // extractvalue x, (insertvalue y, elt, n), n -> elt
+ unsigned NumIdxs = Idxs.size();
+ for (auto *IVI = dyn_cast<InsertValueInst>(Agg); IVI != nullptr;
+ IVI = dyn_cast<InsertValueInst>(IVI->getAggregateOperand())) {
+ ArrayRef<unsigned> InsertValueIdxs = IVI->getIndices();
+ unsigned NumInsertValueIdxs = InsertValueIdxs.size();
+ unsigned NumCommonIdxs = std::min(NumInsertValueIdxs, NumIdxs);
+ if (InsertValueIdxs.slice(0, NumCommonIdxs) ==
+ Idxs.slice(0, NumCommonIdxs)) {
+ if (NumIdxs == NumInsertValueIdxs)
+ return IVI->getInsertedValueOperand();
+ break;
+ }
+ }
+
+ return nullptr;
+}
+
+Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT,
+ AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
+}
+
+/// SimplifyExtractElementInst - Given operands for an ExtractElementInst, see if we
+/// can fold the result. If not, this returns null.
+static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
+ unsigned) {
+ if (auto *CVec = dyn_cast<Constant>(Vec)) {
+ if (auto *CIdx = dyn_cast<Constant>(Idx))
+ return ConstantFoldExtractElementInstruction(CVec, CIdx);
+
+ // The index is not relevant if our vector is a splat.
+ if (auto *Splat = CVec->getSplatValue())
+ return Splat;
+
+ if (isa<UndefValue>(Vec))
+ return UndefValue::get(Vec->getType()->getVectorElementType());
+ }
+
+ // If extracting a specified index from the vector, see if we can recursively
+ // find a previously computed scalar that was inserted into the vector.
+ if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) {
+ unsigned IndexVal = IdxC->getZExtValue();
+ unsigned VectorWidth = Vec->getType()->getVectorNumElements();
+
+ // If this is extracting an invalid index, turn this into undef, to avoid
+ // crashing the code below.
+ if (IndexVal >= VectorWidth)
+ return UndefValue::get(Vec->getType()->getVectorElementType());
+
+ if (Value *Elt = findScalarElement(Vec, IndexVal))
+ return Elt;
+ }
+
+ return nullptr;
+}
+
+Value *llvm::SimplifyExtractElementInst(
+ Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) {
+ return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
+}
+
/// SimplifyPHINode - See if we can fold the given phi. If not, returns null.
static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
// If all of the PHI's incoming values are the same then replace the PHI node
@@ -3670,7 +3756,7 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
- return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+ return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
}
Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -3900,9 +3986,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::FCmp:
- Result =
- SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0),
- I->getOperand(1), DL, TLI, DT, AC, I);
+ Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1),
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::Select:
Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
@@ -3920,6 +4006,18 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
IV->getIndices(), DL, TLI, DT, AC, I);
break;
}
+ case Instruction::ExtractValue: {
+ auto *EVI = cast<ExtractValueInst>(I);
+ Result = SimplifyExtractValueInst(EVI->getAggregateOperand(),
+ EVI->getIndices(), DL, TLI, DT, AC, I);
+ break;
+ }
+ case Instruction::ExtractElement: {
+ auto *EEI = cast<ExtractElementInst>(I);
+ Result = SimplifyExtractElementInst(
+ EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I);
+ break;
+ }
case Instruction::PHI:
Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I));
break;
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index b11cd7e84a6d..becbae4c5b50 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -48,6 +48,13 @@ static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
+/// \brief The maximum iterations used to merge memory checks
+static cl::opt<unsigned> MemoryCheckMergeThreshold(
+ "memory-check-merge-threshold", cl::Hidden,
+ cl::desc("Maximum number of comparisons done when trying to merge "
+ "runtime memory checks. (default = 100)"),
+ cl::init(100));
+
/// Maximum SIMD width.
const unsigned VectorizerParams::MaxVectorWidth = 64;
@@ -112,35 +119,182 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
return SE->getSCEV(Ptr);
}
-void LoopAccessInfo::RuntimePointerCheck::insert(
- ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
- unsigned ASId, const ValueToValueMap &Strides) {
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
+ unsigned DepSetId, unsigned ASId,
+ const ValueToValueMap &Strides) {
// Get the stride replaced scev.
const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
- Pointers.push_back(Ptr);
- Starts.push_back(AR->getStart());
- Ends.push_back(ScEnd);
- IsWritePtr.push_back(WritePtr);
- DependencySetId.push_back(DepSetId);
- AliasSetId.push_back(ASId);
+ Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId,
+ Sc);
+}
+
+bool RuntimePointerChecking::needsChecking(
+ const CheckingPtrGroup &M, const CheckingPtrGroup &N,
+ const SmallVectorImpl<int> *PtrPartition) const {
+ for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
+ for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
+ if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
+ return true;
+ return false;
+}
+
+/// Compare \p I and \p J and return the minimum.
+/// Return nullptr in case we couldn't find an answer.
+static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
+ ScalarEvolution *SE) {
+ const SCEV *Diff = SE->getMinusSCEV(J, I);
+ const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
+
+ if (!C)
+ return nullptr;
+ if (C->getValue()->isNegative())
+ return J;
+ return I;
+}
+
+bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) {
+ const SCEV *Start = RtCheck.Pointers[Index].Start;
+ const SCEV *End = RtCheck.Pointers[Index].End;
+
+ // Compare the starts and ends with the known minimum and maximum
+ // of this set. We need to know how we compare against the min/max
+ // of the set in order to be able to emit memchecks.
+ const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
+ if (!Min0)
+ return false;
+
+ const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
+ if (!Min1)
+ return false;
+
+ // Update the low bound expression if we've found a new min value.
+ if (Min0 == Start)
+ Low = Start;
+
+ // Update the high bound expression if we've found a new max value.
+ if (Min1 != End)
+ High = End;
+
+ Members.push_back(Index);
+ return true;
}
-bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
+void RuntimePointerChecking::groupChecks(
+ MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+ // We build the groups from dependency candidates equivalence classes
+ // because:
+ // - We know that pointers in the same equivalence class share
+ // the same underlying object and therefore there is a chance
+ // that we can compare pointers
+ // - We wouldn't be able to merge two pointers for which we need
+ // to emit a memcheck. The classes in DepCands are already
+ // conveniently built such that no two pointers in the same
+ // class need checking against each other.
+
+ // We use the following (greedy) algorithm to construct the groups
+ // For every pointer in the equivalence class:
+ // For each existing group:
+ // - if the difference between this pointer and the min/max bounds
+ // of the group is a constant, then make the pointer part of the
+ // group and update the min/max bounds of that group as required.
+
+ CheckingGroups.clear();
+
+ // If we don't have the dependency partitions, construct a new
+ // checking pointer group for each pointer.
+ if (!UseDependencies) {
+ for (unsigned I = 0; I < Pointers.size(); ++I)
+ CheckingGroups.push_back(CheckingPtrGroup(I, *this));
+ return;
+ }
+
+ unsigned TotalComparisons = 0;
+
+ DenseMap<Value *, unsigned> PositionMap;
+ for (unsigned Index = 0; Index < Pointers.size(); ++Index)
+ PositionMap[Pointers[Index].PointerValue] = Index;
+
+ // We need to keep track of what pointers we've already seen so we
+ // don't process them twice.
+ SmallSet<unsigned, 2> Seen;
+
+ // Go through all equivalence classes, get the the "pointer check groups"
+ // and add them to the overall solution. We use the order in which accesses
+ // appear in 'Pointers' to enforce determinism.
+ for (unsigned I = 0; I < Pointers.size(); ++I) {
+ // We've seen this pointer before, and therefore already processed
+ // its equivalence class.
+ if (Seen.count(I))
+ continue;
+
+ MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
+ Pointers[I].IsWritePtr);
+
+ SmallVector<CheckingPtrGroup, 2> Groups;
+ auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
+
+ // Because DepCands is constructed by visiting accesses in the order in
+ // which they appear in alias sets (which is deterministic) and the
+ // iteration order within an equivalence class member is only dependent on
+ // the order in which unions and insertions are performed on the
+ // equivalence class, the iteration order is deterministic.
+ for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
+ MI != ME; ++MI) {
+ unsigned Pointer = PositionMap[MI->getPointer()];
+ bool Merged = false;
+ // Mark this pointer as seen.
+ Seen.insert(Pointer);
+
+ // Go through all the existing sets and see if we can find one
+ // which can include this pointer.
+ for (CheckingPtrGroup &Group : Groups) {
+ // Don't perform more than a certain amount of comparisons.
+ // This should limit the cost of grouping the pointers to something
+ // reasonable. If we do end up hitting this threshold, the algorithm
+ // will create separate groups for all remaining pointers.
+ if (TotalComparisons > MemoryCheckMergeThreshold)
+ break;
+
+ TotalComparisons++;
+
+ if (Group.addPointer(Pointer)) {
+ Merged = true;
+ break;
+ }
+ }
+
+ if (!Merged)
+ // We couldn't add this pointer to any existing set or the threshold
+ // for the number of comparisons has been reached. Create a new group
+ // to hold the current pointer.
+ Groups.push_back(CheckingPtrGroup(Pointer, *this));
+ }
+
+ // We've computed the grouped checks for this partition.
+ // Save the results and continue with the next one.
+ std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
+ }
+}
+
+bool RuntimePointerChecking::needsChecking(
unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+ const PointerInfo &PointerI = Pointers[I];
+ const PointerInfo &PointerJ = Pointers[J];
+
// No need to check if two readonly pointers intersect.
- if (!IsWritePtr[I] && !IsWritePtr[J])
+ if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
return false;
// Only need to check pointers between two different dependency sets.
- if (DependencySetId[I] == DependencySetId[J])
+ if (PointerI.DependencySetId == PointerJ.DependencySetId)
return false;
// Only need to check pointers in the same alias set.
- if (AliasSetId[I] != AliasSetId[J])
+ if (PointerI.AliasSetId != PointerJ.AliasSetId)
return false;
// If PtrPartition is set omit checks between pointers of the same partition.
@@ -153,45 +307,75 @@ bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
return true;
}
-void LoopAccessInfo::RuntimePointerCheck::print(
+void RuntimePointerChecking::print(
raw_ostream &OS, unsigned Depth,
const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
- if (NumPointers == 0)
- return;
OS.indent(Depth) << "Run-time memory checks:\n";
+
unsigned N = 0;
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition)) {
- OS.indent(Depth) << N++ << ":\n";
- OS.indent(Depth + 2) << *Pointers[I];
- if (PtrPartition)
- OS << " (Partition: " << (*PtrPartition)[I] << ")";
- OS << "\n";
- OS.indent(Depth + 2) << *Pointers[J];
- if (PtrPartition)
- OS << " (Partition: " << (*PtrPartition)[J] << ")";
- OS << "\n";
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I)
+ for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
+ if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
+ OS.indent(Depth) << "Check " << N++ << ":\n";
+ OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
+
+ for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
+ OS.indent(Depth + 2)
+ << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n";
+ if (PtrPartition)
+ OS << " (Partition: "
+ << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
+ << "\n";
+ }
+
+ OS.indent(Depth + 2) << "Against group " << J << ":\n";
+
+ for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
+ OS.indent(Depth + 2)
+ << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n";
+ if (PtrPartition)
+ OS << " (Partition: "
+ << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
+ << "\n";
+ }
}
+
+ OS.indent(Depth) << "Grouped accesses:\n";
+ for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+ OS.indent(Depth + 2) << "Group " << I << ":\n";
+ OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
+ << " High: " << *CheckingGroups[I].High << ")\n";
+ for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
+ OS.indent(Depth + 6) << "Member: "
+ << *Pointers[CheckingGroups[I].Members[J]].Expr
+ << "\n";
+ }
+ }
}
-unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
+unsigned RuntimePointerChecking::getNumberOfChecks(
const SmallVectorImpl<int> *PtrPartition) const {
- unsigned NumPointers = Pointers.size();
+
+ unsigned NumPartitions = CheckingGroups.size();
unsigned CheckCount = 0;
- for (unsigned I = 0; I < NumPointers; ++I)
- for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J, PtrPartition))
+ for (unsigned I = 0; I < NumPartitions; ++I)
+ for (unsigned J = I + 1; J < NumPartitions; ++J)
+ if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
CheckCount++;
return CheckCount;
}
-bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+bool RuntimePointerChecking::needsAnyChecking(
const SmallVectorImpl<int> *PtrPartition) const {
- return getNumberOfChecks(PtrPartition) != 0;
+ unsigned NumPointers = Pointers.size();
+
+ for (unsigned I = 0; I < NumPointers; ++I)
+ for (unsigned J = I + 1; J < NumPointers; ++J)
+ if (needsChecking(I, J, PtrPartition))
+ return true;
+ return false;
}
namespace {
@@ -207,7 +391,8 @@ public:
AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA)
- : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {}
+ : DL(Dl), AST(*AA), LI(LI), DepCands(DA),
+ IsRTCheckAnalysisNeeded(false) {}
/// \brief Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
@@ -226,11 +411,12 @@ public:
}
/// \brief Check whether we can check the pointers at runtime for
- /// non-intersection. Returns true when we have 0 pointers
- /// (a check on 0 pointers for non-intersection will always return true).
- bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
- bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop,
- const ValueToValueMap &Strides,
+ /// non-intersection.
+ ///
+ /// Returns true if we need no check or if we do and we can generate them
+ /// (i.e. the pointers have computable bounds).
+ bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
+ Loop *TheLoop, const ValueToValueMap &Strides,
bool ShouldCheckStride = false);
/// \brief Goes over all memory accesses, checks whether a RT check is needed
@@ -239,8 +425,11 @@ public:
processMemAccesses();
}
- bool isRTCheckNeeded() { return IsRTCheckNeeded; }
-
+ /// \brief Initial processing of memory accesses determined that we need to
+ /// perform dependency checking.
+ ///
+ /// Note that this can later be cleared if we retry memcheck analysis without
+ /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
/// We decided that no dependence analysis would be used. Reset the state.
@@ -255,7 +444,7 @@ private:
typedef SetVector<MemAccessInfo> PtrAccessSet;
/// \brief Go over all memory access and check whether runtime pointer checks
- /// are needed /// and build sets of dependency check candidates.
+ /// are needed and build sets of dependency check candidates.
void processMemAccesses();
/// Set of all accesses.
@@ -280,7 +469,14 @@ private:
/// dependence check.
MemoryDepChecker::DepCandidates &DepCands;
- bool IsRTCheckNeeded;
+ /// \brief Initial processing of memory accesses determined that we may need
+ /// to add memchecks. Perform the analysis to determine the necessary checks.
+ ///
+ /// Note that, this is different from isDependencyCheckNeeded. When we retry
+ /// memcheck analysis without dependency checking
+ /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
+ /// while this remains set if we have potentially dependent accesses.
+ bool IsRTCheckAnalysisNeeded;
};
} // end anonymous namespace
@@ -296,16 +492,16 @@ static bool hasComputableBounds(ScalarEvolution *SE,
return AR->isAffine();
}
-bool AccessAnalysis::canCheckPtrAtRT(
- LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck,
- ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
- bool ShouldCheckStride) {
+bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
+ ScalarEvolution *SE, Loop *TheLoop,
+ const ValueToValueMap &StridesMap,
+ bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRT = true;
- NeedRTCheck = false;
- if (!IsRTCheckNeeded) return true;
+ bool NeedRTCheck = false;
+ if (!IsRTCheckAnalysisNeeded) return true;
bool IsDepCheckNeeded = isDependencyCheckNeeded();
@@ -313,6 +509,9 @@ bool AccessAnalysis::canCheckPtrAtRT(
// Accesses between different groups doesn't need to be checked.
unsigned ASId = 1;
for (auto &AS : AST) {
+ int NumReadPtrChecks = 0;
+ int NumWritePtrChecks = 0;
+
// We assign consecutive id to access from different dependence sets.
// Accesses within the same set don't need a runtime check.
unsigned RunningDepId = 1;
@@ -323,6 +522,11 @@ bool AccessAnalysis::canCheckPtrAtRT(
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
MemAccessInfo Access(Ptr, IsWrite);
+ if (IsWrite)
+ ++NumWritePtrChecks;
+ else
+ ++NumReadPtrChecks;
+
if (hasComputableBounds(SE, StridesMap, Ptr) &&
// When we run after a failing dependency check we have to make sure
// we don't have wrapping pointers.
@@ -341,7 +545,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
// Each access has its own dependence set.
DepId = RunningDepId++;
- RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+ RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
} else {
@@ -350,15 +554,21 @@ bool AccessAnalysis::canCheckPtrAtRT(
}
}
+ // If we have at least two writes or one write and a read then we need to
+ // check them. But there is no need to checks if there is only one
+ // dependence set for this alias set.
+ //
+ // Note that this function computes CanDoRT and NeedRTCheck independently.
+ // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
+ // for which we couldn't find the bounds but we don't actually need to emit
+ // any checks so it does not matter.
+ if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
+ NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
+ NumWritePtrChecks >= 1));
+
++ASId;
}
- // We need a runtime check if there are any accesses that need checking.
- // However, some accesses cannot be checked (for example because we
- // can't determine their bounds). In these cases we would need a check
- // but wouldn't be able to add it.
- NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr);
-
// If the pointers that we would use for the bounds comparison have different
// address spaces, assume the values aren't directly comparable, so we can't
// use them for the runtime check. We also have to assume they could
@@ -368,14 +578,15 @@ bool AccessAnalysis::canCheckPtrAtRT(
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i + 1; j < NumPointers; ++j) {
// Only need to check pointers between two different dependency sets.
- if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+ if (RtCheck.Pointers[i].DependencySetId ==
+ RtCheck.Pointers[j].DependencySetId)
continue;
// Only need to check pointers in the same alias set.
- if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
+ if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
continue;
- Value *PtrI = RtCheck.Pointers[i];
- Value *PtrJ = RtCheck.Pointers[j];
+ Value *PtrI = RtCheck.Pointers[i].PointerValue;
+ Value *PtrJ = RtCheck.Pointers[j].PointerValue;
unsigned ASi = PtrI->getType()->getPointerAddressSpace();
unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
@@ -387,7 +598,18 @@ bool AccessAnalysis::canCheckPtrAtRT(
}
}
- return CanDoRT;
+ if (NeedRTCheck && CanDoRT)
+ RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
+
+ DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr)
+ << " pointer comparisons.\n");
+
+ RtCheck.Need = NeedRTCheck;
+
+ bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
+ if (!CanDoRTIfNeeded)
+ RtCheck.reset();
+ return CanDoRTIfNeeded;
}
void AccessAnalysis::processMemAccesses() {
@@ -470,7 +692,7 @@ void AccessAnalysis::processMemAccesses() {
// catch "a[i] = a[i] + " without having to do a dependence check).
if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
CheckDeps.insert(Access);
- IsRTCheckNeeded = true;
+ IsRTCheckAnalysisNeeded = true;
}
if (IsWrite)
@@ -600,7 +822,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
// Check the step is constant.
const SCEV *Step = AR->getStepRecurrence(*SE);
- // Calculate the pointer stride and check if it is consecutive.
+ // Calculate the pointer stride and check if it is constant.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C) {
DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
@@ -805,11 +1027,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
<< *InstMap[BIdx] << ": " << *Dist << "\n");
- // Need consecutive accesses. We don't want to vectorize
+ // Need accesses with constant stride. We don't want to vectorize
// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
// the address space.
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
- DEBUG(dbgs() << "Non-consecutive pointer access\n");
+ DEBUG(dbgs() << "Pointer access with non-constant stride\n");
return Dependence::Unknown;
}
@@ -859,8 +1081,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
unsigned Stride = std::abs(StrideAPtr);
if (Stride > 1 &&
- areStridedAccessesIndependent(Distance, Stride, TypeByteSize))
+ areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) {
+ DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
return Dependence::NoDep;
+ }
// Bail out early if passed-in parameters make vectorization not feasible.
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
@@ -1098,8 +1322,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
unsigned NumReads = 0;
unsigned NumReadWrites = 0;
- PtrRtCheck.Pointers.clear();
- PtrRtCheck.Need = false;
+ PtrRtChecking.Pointers.clear();
+ PtrRtChecking.Need = false;
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
@@ -1258,28 +1482,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
- bool NeedRTCheck;
- bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck,
- NeedRTCheck, SE,
- TheLoop, Strides);
-
- DEBUG(dbgs() << "LAA: We need to do "
- << PtrRtCheck.getNumberOfChecks(nullptr)
- << " pointer comparisons.\n");
-
- // Check that we found the bounds for the pointer.
- if (CanDoRT)
- DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
- else if (NeedRTCheck) {
+ bool CanDoRTIfNeeded =
+ Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
+ if (!CanDoRTIfNeeded) {
emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
- DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " <<
- "the array bounds.\n");
- PtrRtCheck.reset();
+ DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
+ << "the array bounds.\n");
CanVecMem = false;
return;
}
- PtrRtCheck.Need = NeedRTCheck;
+ DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
CanVecMem = true;
if (Accesses.isDependencyCheckNeeded()) {
@@ -1290,23 +1503,21 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
- NeedRTCheck = true;
// Clear the dependency checks. We assume they are not needed.
Accesses.resetDepChecks(DepChecker);
- PtrRtCheck.reset();
- PtrRtCheck.Need = true;
+ PtrRtChecking.reset();
+ PtrRtChecking.Need = true;
- CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE,
- TheLoop, Strides, true);
+ CanDoRTIfNeeded =
+ Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);
// Check that we found the bounds for the pointer.
- if (NeedRTCheck && !CanDoRT) {
+ if (!CanDoRTIfNeeded) {
emitAnalysis(LoopAccessReport()
<< "cannot check memory dependencies at runtime");
DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
- PtrRtCheck.reset();
CanVecMem = false;
return;
}
@@ -1317,8 +1528,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
if (CanVecMem)
DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
- << (NeedRTCheck ? "" : " don't")
- << " need a runtime memory check.\n");
+ << (PtrRtChecking.Need ? "" : " don't")
+ << " need runtime memory checks.\n");
else {
emitAnalysis(LoopAccessReport() <<
"unsafe dependent memory operations in loop");
@@ -1357,35 +1568,38 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
- if (!PtrRtCheck.Need)
+ if (!PtrRtChecking.Need)
return std::make_pair(nullptr, nullptr);
- unsigned NumPointers = PtrRtCheck.Pointers.size();
- SmallVector<TrackingVH<Value> , 2> Starts;
- SmallVector<TrackingVH<Value> , 2> Ends;
+ SmallVector<TrackingVH<Value>, 2> Starts;
+ SmallVector<TrackingVH<Value>, 2> Ends;
LLVMContext &Ctx = Loc->getContext();
SCEVExpander Exp(*SE, DL, "induction");
Instruction *FirstInst = nullptr;
- for (unsigned i = 0; i < NumPointers; ++i) {
- Value *Ptr = PtrRtCheck.Pointers[i];
+ for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
+ const RuntimePointerChecking::CheckingPtrGroup &CG =
+ PtrRtChecking.CheckingGroups[i];
+ Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue;
const SCEV *Sc = SE->getSCEV(Ptr);
if (SE->isLoopInvariant(Sc, TheLoop)) {
- DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
- *Ptr <<"\n");
+ DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
+ << "\n");
Starts.push_back(Ptr);
Ends.push_back(Ptr);
} else {
- DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n');
unsigned AS = Ptr->getType()->getPointerAddressSpace();
// Use this type for pointer arithmetic.
Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+ Value *Start = nullptr, *End = nullptr;
- Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc);
- Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc);
+ DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+ Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
+ End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
+ DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n");
Starts.push_back(Start);
Ends.push_back(End);
}
@@ -1394,9 +1608,14 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
- for (unsigned i = 0; i < NumPointers; ++i) {
- for (unsigned j = i+1; j < NumPointers; ++j) {
- if (!PtrRtCheck.needsChecking(i, j, PtrPartition))
+ for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
+ for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) {
+ const RuntimePointerChecking::CheckingPtrGroup &CGI =
+ PtrRtChecking.CheckingGroups[i];
+ const RuntimePointerChecking::CheckingPtrGroup &CGJ =
+ PtrRtChecking.CheckingGroups[j];
+
+ if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition))
continue;
unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
@@ -1447,7 +1666,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI,
const ValueToValueMap &Strides)
- : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
+ : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
@@ -1457,7 +1676,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (CanVecMem) {
- if (PtrRtCheck.Need)
+ if (PtrRtChecking.Need)
OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
else
OS.indent(Depth) << "Memory dependences are safe\n";
@@ -1476,7 +1695,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
// List the pair of accesses need run-time checks to prove independence.
- PtrRtCheck.print(OS, Depth);
+ PtrRtChecking.print(OS, Depth);
OS << "\n";
OS.indent(Depth) << "Store to invariant address was "
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index 7617622b9ab6..322a9a80de4c 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -72,7 +72,6 @@ namespace {
}
void deleteValue(Value *V) override {}
- void copyValue(Value *From, Value *To) override {}
void addEscapingUse(Use &U) override {}
/// getAdjustedAnalysisPointer - This method is used when a pass implements
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 520d1e5ef87d..7d1c3fbef68a 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -28,12 +28,12 @@ namespace {
///
/// This is used when no target specific information is available.
struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
- explicit NoTTIImpl(const DataLayout *DL)
+ explicit NoTTIImpl(const DataLayout &DL)
: TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
};
}
-TargetTransformInfo::TargetTransformInfo(const DataLayout *DL)
+TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
: TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
TargetTransformInfo::~TargetTransformInfo() {}
@@ -304,7 +304,7 @@ TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) {
char TargetIRAnalysis::PassID;
TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) {
- return Result(&F.getParent()->getDataLayout());
+ return Result(F.getParent()->getDataLayout());
}
// Register the basic pass.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index c45005f343d3..fa0d7798cae9 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1464,7 +1464,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// If the object is defined in the current Module, we'll be giving
// it the preferred alignment. Otherwise, we have to assume that it
// may only have the minimum ABI alignment.
- if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
+ if (GVar->isStrongDefinitionForLinker())
Align = DL.getPreferredAlignment(GVar);
else
Align = DL.getABITypeAlignment(ObjectType);
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 96fddd103cc5..67f68dc8391e 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -11,7 +11,13 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Value.h"
/// \brief Identify if the intrinsic is trivially vectorizable.
/// This method returns true if the intrinsic's argument types are all
@@ -211,3 +217,195 @@ llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
return Intrinsic::not_intrinsic;
}
+
+/// \brief Find the operand of the GEP that should be checked for consecutive
+/// stores. This ignores trailing indices that have no effect on the final
+/// pointer.
+unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
+ const DataLayout &DL = Gep->getModule()->getDataLayout();
+ unsigned LastOperand = Gep->getNumOperands() - 1;
+ unsigned GEPAllocSize = DL.getTypeAllocSize(
+ cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
+
+ // Walk backwards and try to peel off zeros.
+ while (LastOperand > 1 &&
+ match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) {
+ // Find the type we're currently indexing into.
+ gep_type_iterator GEPTI = gep_type_begin(Gep);
+ std::advance(GEPTI, LastOperand - 1);
+
+ // If it's a type with the same allocation size as the result of the GEP we
+ // can peel off the zero index.
+ if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
+ break;
+ --LastOperand;
+ }
+
+ return LastOperand;
+}
+
+/// \brief If the argument is a GEP, then returns the operand identified by
+/// getGEPInductionOperand. However, if there is some other non-loop-invariant
+/// operand, it returns that instead.
+llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE,
+ Loop *Lp) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return Ptr;
+
+ unsigned InductionOperand = getGEPInductionOperand(GEP);
+
+ // Check that all of the gep indices are uniform except for our induction
+ // operand.
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
+ if (i != InductionOperand &&
+ !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
+ return Ptr;
+ return GEP->getOperand(InductionOperand);
+}
+
+/// \brief If a value has only one user that is a CastInst, return it.
+llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) {
+ llvm::Value *UniqueCast = nullptr;
+ for (User *U : Ptr->users()) {
+ CastInst *CI = dyn_cast<CastInst>(U);
+ if (CI && CI->getType() == Ty) {
+ if (!UniqueCast)
+ UniqueCast = CI;
+ else
+ return nullptr;
+ }
+ }
+ return UniqueCast;
+}
+
+/// \brief Get the stride of a pointer access in a loop. Looks for symbolic
+/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
+llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
+ Loop *Lp) {
+ const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ if (!PtrTy || PtrTy->isAggregateType())
+ return nullptr;
+
+ // Try to remove a gep instruction to make the pointer (actually index at this
+ // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
+ // pointer, otherwise, we are analyzing the index.
+ llvm::Value *OrigPtr = Ptr;
+
+ // The size of the pointer access.
+ int64_t PtrAccessSize = 1;
+
+ Ptr = stripGetElementPtr(Ptr, SE, Lp);
+ const SCEV *V = SE->getSCEV(Ptr);
+
+ if (Ptr != OrigPtr)
+ // Strip off casts.
+ while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
+ V = C->getOperand();
+
+ const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
+ if (!S)
+ return nullptr;
+
+ V = S->getStepRecurrence(*SE);
+ if (!V)
+ return nullptr;
+
+ // Strip off the size of access multiplication if we are still analyzing the
+ // pointer.
+ if (OrigPtr == Ptr) {
+ const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout();
+ DL.getTypeAllocSize(PtrTy->getElementType());
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
+ if (M->getOperand(0)->getSCEVType() != scConstant)
+ return nullptr;
+
+ const APInt &APStepVal =
+ cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
+
+ // Huge step value - give up.
+ if (APStepVal.getBitWidth() > 64)
+ return nullptr;
+
+ int64_t StepVal = APStepVal.getSExtValue();
+ if (PtrAccessSize != StepVal)
+ return nullptr;
+ V = M->getOperand(1);
+ }
+ }
+
+ // Strip off casts.
+ Type *StripedOffRecurrenceCast = nullptr;
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
+ StripedOffRecurrenceCast = C->getType();
+ V = C->getOperand();
+ }
+
+ // Look for the loop invariant symbolic value.
+ const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
+ if (!U)
+ return nullptr;
+
+ llvm::Value *Stride = U->getValue();
+ if (!Lp->isLoopInvariant(Stride))
+ return nullptr;
+
+ // If we have stripped off the recurrence cast we have to make sure that we
+ // return the value that is used in this loop so that we can replace it later.
+ if (StripedOffRecurrenceCast)
+ Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
+
+ return Stride;
+}
+
+/// \brief Given a vector and an element number, see if the scalar value is
+/// already around as a register, for example if it were inserted then extracted
+/// from the vector.
+llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
+ assert(V->getType()->isVectorTy() && "Not looking at a vector?");
+ VectorType *VTy = cast<VectorType>(V->getType());
+ unsigned Width = VTy->getNumElements();
+ if (EltNo >= Width) // Out of range access.
+ return UndefValue::get(VTy->getElementType());
+
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->getAggregateElement(EltNo);
+
+ if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+ // If this is an insert to a variable element, we don't know what it is.
+ if (!isa<ConstantInt>(III->getOperand(2)))
+ return nullptr;
+ unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
+
+ // If this is an insert to the element we are looking for, return the
+ // inserted value.
+ if (EltNo == IIElt)
+ return III->getOperand(1);
+
+ // Otherwise, the insertelement doesn't modify the value, recurse on its
+ // vector input.
+ return findScalarElement(III->getOperand(0), EltNo);
+ }
+
+ if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+ unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
+ int InEl = SVI->getMaskValue(EltNo);
+ if (InEl < 0)
+ return UndefValue::get(VTy->getElementType());
+ if (InEl < (int)LHSWidth)
+ return findScalarElement(SVI->getOperand(0), InEl);
+ return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+ }
+
+ // Extract a value from a vector add operation with a constant zero.
+ Value *Val = nullptr; Constant *Con = nullptr;
+ if (match(V,
+ llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val),
+ llvm::PatternMatch::m_Constant(Con)))) {
+ if (Con->getAggregateElement(EltNo)->isNullValue())
+ return findScalarElement(Val, EltNo);
+ }
+
+ // Otherwise, we don't know.
+ return nullptr;
+}