aboutsummaryrefslogtreecommitdiff
path: root/lib/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Analysis')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp4
-rw-r--r--lib/Analysis/AliasSetTracker.cpp12
-rw-r--r--lib/Analysis/Analysis.cpp1
-rw-r--r--lib/Analysis/AssumptionCache.cpp12
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp42
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp19
-rw-r--r--lib/Analysis/CFG.cpp11
-rw-r--r--lib/Analysis/CFGPrinter.cpp2
-rw-r--r--lib/Analysis/CFLAndersAliasAnalysis.cpp19
-rw-r--r--lib/Analysis/CFLSteensAliasAnalysis.cpp20
-rw-r--r--lib/Analysis/CallGraph.cpp4
-rw-r--r--lib/Analysis/CaptureTracking.cpp46
-rw-r--r--lib/Analysis/ConstantFolding.cpp405
-rw-r--r--lib/Analysis/DDG.cpp203
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp8
-rw-r--r--lib/Analysis/DependenceGraphBuilder.cpp228
-rw-r--r--lib/Analysis/DivergenceAnalysis.cpp10
-rw-r--r--lib/Analysis/GlobalsModRef.cpp37
-rw-r--r--lib/Analysis/IVDescriptors.cpp3
-rw-r--r--lib/Analysis/IndirectCallPromotionAnalysis.cpp2
-rw-r--r--lib/Analysis/InlineCost.cpp23
-rw-r--r--lib/Analysis/InstructionSimplify.cpp320
-rw-r--r--lib/Analysis/LazyBranchProbabilityInfo.cpp5
-rw-r--r--lib/Analysis/LazyCallGraph.cpp13
-rw-r--r--lib/Analysis/LazyValueInfo.cpp37
-rw-r--r--lib/Analysis/LegacyDivergenceAnalysis.cpp36
-rw-r--r--lib/Analysis/Lint.cpp2
-rw-r--r--lib/Analysis/Loads.cpp238
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp45
-rw-r--r--lib/Analysis/LoopAnalysisManager.cpp2
-rw-r--r--lib/Analysis/LoopCacheAnalysis.cpp625
-rw-r--r--lib/Analysis/LoopInfo.cpp39
-rw-r--r--lib/Analysis/LoopUnrollAnalyzer.cpp2
-rw-r--r--lib/Analysis/MemDerefPrinter.cpp4
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp51
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp21
-rw-r--r--lib/Analysis/MemorySSA.cpp95
-rw-r--r--lib/Analysis/MemorySSAUpdater.cpp323
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp16
-rw-r--r--lib/Analysis/MustExecute.cpp118
-rw-r--r--lib/Analysis/OptimizationRemarkEmitter.cpp4
-rw-r--r--lib/Analysis/OrderedInstructions.cpp2
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp67
-rw-r--r--lib/Analysis/ScalarEvolution.cpp89
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp19
-rw-r--r--lib/Analysis/StackSafetyAnalysis.cpp4
-rw-r--r--lib/Analysis/SyncDependenceAnalysis.cpp61
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp44
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp64
-rw-r--r--lib/Analysis/TypeMetadataUtils.cpp32
-rw-r--r--lib/Analysis/VFABIDemangling.cpp418
-rw-r--r--lib/Analysis/ValueTracking.cpp658
-rw-r--r--lib/Analysis/VectorUtils.cpp20
53 files changed, 3559 insertions, 1026 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 32241e355eb8..55dd9a4cda08 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -784,7 +784,7 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) {
// previous object first, in this case replacing it with an empty one, before
// registering new results.
AAR.reset(
- new AAResults(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+ new AAResults(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F)));
// BasicAA is always available for function analyses. Also, we add it first
// so that it can trump TBAA results when it proves MustAlias.
@@ -840,7 +840,7 @@ void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
BasicAAResult &BAR) {
- AAResults AAR(P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
+ AAResults AAR(P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
// Add in our explicitly constructed BasicAA results.
if (!DisableBasicAA)
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index a6e5b9fab558..79fbcd464c1b 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -119,6 +119,12 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) {
TotalMayAliasSetSize -= AS->size();
AliasSets.erase(AS);
+ // If we've removed the saturated alias set, set saturated marker back to
+ // nullptr and ensure this tracker is empty.
+ if (AS == AliasAnyAS) {
+ AliasAnyAS = nullptr;
+ assert(AliasSets.empty() && "Tracker not empty");
+ }
}
void AliasSet::removeFromTracker(AliasSetTracker &AST) {
@@ -690,8 +696,10 @@ void AliasSet::print(raw_ostream &OS) const {
}
void AliasSetTracker::print(raw_ostream &OS) const {
- OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
- << PointerMap.size() << " pointer values.\n";
+ OS << "Alias Set Tracker: " << AliasSets.size();
+ if (AliasAnyAS)
+ OS << " (Saturated)";
+ OS << " alias sets for " << PointerMap.size() << " pointer values.\n";
for (const AliasSet &AS : *this)
AS.print(OS);
OS << "\n";
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index d46a8d8e306c..af718526684b 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -65,6 +65,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeModuleDebugInfoPrinterPass(Registry);
initializeModuleSummaryIndexWrapperPassPass(Registry);
initializeMustExecutePrinterPass(Registry);
+ initializeMustBeExecutedContextPrinterPass(Registry);
initializeObjCARCAAWrapperPassPass(Registry);
initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
initializePhiValuesWrapperPassPass(Registry);
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index cf2f845dee0a..129944743c5e 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -130,7 +130,10 @@ void AssumptionCache::unregisterAssumption(CallInst *CI) {
if (AVI != AffectedValues.end())
AffectedValues.erase(AVI);
}
- remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; });
+
+ AssumeHandles.erase(
+ remove_if(AssumeHandles, [CI](WeakTrackingVH &VH) { return CI == VH; }),
+ AssumeHandles.end());
}
void AssumptionCache::AffectedValueCallbackVH::deleted() {
@@ -140,7 +143,7 @@ void AssumptionCache::AffectedValueCallbackVH::deleted() {
// 'this' now dangles!
}
-void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) {
+void AssumptionCache::transferAffectedValuesInCache(Value *OV, Value *NV) {
auto &NAVV = getOrInsertAffectedValues(NV);
auto AVI = AffectedValues.find(OV);
if (AVI == AffectedValues.end())
@@ -149,6 +152,7 @@ void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) {
for (auto &A : AVI->second)
if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end())
NAVV.push_back(A);
+ AffectedValues.erase(OV);
}
void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) {
@@ -157,7 +161,7 @@ void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) {
// Any assumptions that affected this value now affect the new value.
- AC->copyAffectedValuesInCache(getValPtr(), NV);
+ AC->transferAffectedValuesInCache(getValPtr(), NV);
// 'this' now might dangle! If the AffectedValues map was resized to add an
// entry for NV then this object might have been destroyed in favor of some
// copy in the grown map.
@@ -252,7 +256,7 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
// Ok, build a new cache by scanning the function, insert it and the value
// handle into our map, and return the newly populated cache.
auto IP = AssumptionCaches.insert(std::make_pair(
- FunctionCallbackVH(&F, this), llvm::make_unique<AssumptionCache>(F)));
+ FunctionCallbackVH(&F, this), std::make_unique<AssumptionCache>(F)));
assert(IP.second && "Scanning function already in the map?");
return *IP.first->second;
}
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 3721c99883b8..f3c30c258c19 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -233,6 +233,26 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
}
+/// Return the minimal extent from \p V to the end of the underlying object,
+/// assuming the result is used in an aliasing query. E.g., we do use the query
+/// location size and the fact that null pointers cannot alias here.
+static uint64_t getMinimalExtentFrom(const Value &V,
+ const LocationSize &LocSize,
+ const DataLayout &DL,
+ bool NullIsValidLoc) {
+ // If we have dereferenceability information we know a lower bound for the
+ // extent as accesses for a lower offset would be valid. We need to exclude
+ // the "or null" part if null is a valid pointer.
+ bool CanBeNull;
+ uint64_t DerefBytes = V.getPointerDereferenceableBytes(DL, CanBeNull);
+ DerefBytes = (CanBeNull && NullIsValidLoc) ? 0 : DerefBytes;
+ // If queried with a precise location size, we assume that location size to be
+ // accessed, thus valid.
+ if (LocSize.isPrecise())
+ DerefBytes = std::max(DerefBytes, LocSize.getValue());
+ return DerefBytes;
+}
+
/// Returns true if we can prove that the object specified by V has size Size.
static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
const TargetLibraryInfo &TLI, bool NullIsValidLoc) {
@@ -481,7 +501,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// because it should be in sync with CaptureTracking. Not using it may
// cause weird miscompilations where 2 aliasing pointers are assumed to
// noalias.
- if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) {
V = RP;
continue;
}
@@ -1792,10 +1812,12 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
bool NullIsValidLocation = NullPointerIsDefined(&F);
- if ((V1Size.isPrecise() && isObjectSmallerThan(O2, V1Size.getValue(), DL, TLI,
- NullIsValidLocation)) ||
- (V2Size.isPrecise() && isObjectSmallerThan(O1, V2Size.getValue(), DL, TLI,
- NullIsValidLocation)))
+ if ((isObjectSmallerThan(
+ O2, getMinimalExtentFrom(*V1, V1Size, DL, NullIsValidLocation), DL,
+ TLI, NullIsValidLocation)) ||
+ (isObjectSmallerThan(
+ O1, getMinimalExtentFrom(*V2, V2Size, DL, NullIsValidLocation), DL,
+ TLI, NullIsValidLocation)))
return NoAlias;
// Check the cache before climbing up use-def chains. This also terminates
@@ -2053,8 +2075,9 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) {
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *PVWP = getAnalysisIfAvailable<PhiValuesWrapperPass>();
- Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F, TLIWP.getTLI(),
- ACT.getAssumptionCache(F), &DTWP.getDomTree(),
+ Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), F,
+ TLIWP.getTLI(F), ACT.getAssumptionCache(F),
+ &DTWP.getDomTree(),
LIWP ? &LIWP->getLoopInfo() : nullptr,
PVWP ? &PVWP->getResult() : nullptr));
@@ -2071,8 +2094,7 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) {
return BasicAAResult(
- F.getParent()->getDataLayout(),
- F,
- P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ F.getParent()->getDataLayout(), F,
+ P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 5eb95003f5d8..a06ee096d54c 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -118,6 +118,13 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
static const uint32_t FPH_TAKEN_WEIGHT = 20;
static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
+/// This is the probability for an ordered floating point comparison.
+static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
+/// This is the probability for an unordered floating point comparison, it means
+/// one or two of the operands are NaN. Usually it is used to test for an
+/// exceptional case, so the result is unlikely.
+static const uint32_t FPH_UNO_WEIGHT = 1;
+
/// Invoke-terminating normal branch taken weight
///
/// This is the weight for branching to the normal destination of an invoke
@@ -778,6 +785,8 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
if (!FCmp)
return false;
+ uint32_t TakenWeight = FPH_TAKEN_WEIGHT;
+ uint32_t NontakenWeight = FPH_NONTAKEN_WEIGHT;
bool isProb;
if (FCmp->isEquality()) {
// f1 == f2 -> Unlikely
@@ -786,9 +795,13 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
} else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
// !isnan -> Likely
isProb = true;
+ TakenWeight = FPH_ORD_WEIGHT;
+ NontakenWeight = FPH_UNO_WEIGHT;
} else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
// isnan -> Unlikely
isProb = false;
+ TakenWeight = FPH_ORD_WEIGHT;
+ NontakenWeight = FPH_UNO_WEIGHT;
} else {
return false;
}
@@ -798,8 +811,7 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
if (!isProb)
std::swap(TakenIdx, NonTakenIdx);
- BranchProbability TakenProb(FPH_TAKEN_WEIGHT,
- FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
+ BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight);
setEdgeProbability(BB, TakenIdx, TakenProb);
setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
return true;
@@ -1014,7 +1026,8 @@ void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
BPI.calculate(F, LI, &TLI);
return false;
}
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 18b83d6838cc..8215b4ecbb03 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -87,11 +87,18 @@ unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
/// with multiple predecessors.
bool llvm::isCriticalEdge(const Instruction *TI, unsigned SuccNum,
bool AllowIdenticalEdges) {
- assert(TI->isTerminator() && "Must be a terminator to have successors!");
assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+ return isCriticalEdge(TI, TI->getSuccessor(SuccNum), AllowIdenticalEdges);
+}
+
+bool llvm::isCriticalEdge(const Instruction *TI, const BasicBlock *Dest,
+ bool AllowIdenticalEdges) {
+ assert(TI->isTerminator() && "Must be a terminator to have successors!");
if (TI->getNumSuccessors() == 1) return false;
- const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+ assert(find(predecessors(Dest), TI->getParent()) != pred_end(Dest) &&
+ "No edge between TI's block and Dest.");
+
const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
// If there is more than one predecessor, this is a critical edge...
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 619b675b58d8..4f4103fefa25 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -99,7 +99,7 @@ static void writeCFGToDotFile(Function &F, bool CFGOnly = false) {
errs() << "Writing '" << Filename << "'...";
std::error_code EC;
- raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
if (!EC)
WriteGraph(File, (const Function*)&F, CFGOnly);
diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 690e514d4f5c..fd90bd1521d6 100644
--- a/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -88,9 +88,11 @@ using namespace llvm::cflaa;
#define DEBUG_TYPE "cfl-anders-aa"
-CFLAndersAAResult::CFLAndersAAResult(const TargetLibraryInfo &TLI) : TLI(TLI) {}
+CFLAndersAAResult::CFLAndersAAResult(
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
+ : GetTLI(std::move(GetTLI)) {}
CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS)
- : AAResultBase(std::move(RHS)), TLI(RHS.TLI) {}
+ : AAResultBase(std::move(RHS)), GetTLI(std::move(RHS.GetTLI)) {}
CFLAndersAAResult::~CFLAndersAAResult() = default;
namespace {
@@ -779,7 +781,7 @@ static AliasAttrMap buildAttrMap(const CFLGraph &Graph,
CFLAndersAAResult::FunctionInfo
CFLAndersAAResult::buildInfoFrom(const Function &Fn) {
CFLGraphBuilder<CFLAndersAAResult> GraphBuilder(
- *this, TLI,
+ *this, GetTLI(const_cast<Function &>(Fn)),
// Cast away the constness here due to GraphBuilder's API requirement
const_cast<Function &>(Fn));
auto &Graph = GraphBuilder.getCFLGraph();
@@ -898,7 +900,10 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
AnalysisKey CFLAndersAA::Key;
CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) {
- return CFLAndersAAResult(AM.getResult<TargetLibraryAnalysis>(F));
+ auto GetTLI = [&AM](Function &F) -> TargetLibraryInfo & {
+ return AM.getResult<TargetLibraryAnalysis>(F);
+ };
+ return CFLAndersAAResult(GetTLI);
}
char CFLAndersAAWrapperPass::ID = 0;
@@ -914,8 +919,10 @@ CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) {
}
void CFLAndersAAWrapperPass::initializePass() {
- auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
- Result.reset(new CFLAndersAAResult(TLIWP.getTLI()));
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ Result.reset(new CFLAndersAAResult(GetTLI));
}
void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp
index 44b1834f70bf..b87aa4065392 100644
--- a/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -60,10 +60,11 @@ using namespace llvm::cflaa;
#define DEBUG_TYPE "cfl-steens-aa"
-CFLSteensAAResult::CFLSteensAAResult(const TargetLibraryInfo &TLI)
- : AAResultBase(), TLI(TLI) {}
+CFLSteensAAResult::CFLSteensAAResult(
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
+ : AAResultBase(), GetTLI(std::move(GetTLI)) {}
CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
- : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {}
+ : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {}
CFLSteensAAResult::~CFLSteensAAResult() = default;
/// Information we have about a function and would like to keep around.
@@ -181,7 +182,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo(
// Builds the graph + StratifiedSets for a function.
CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
- CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, TLI, *Fn);
+ CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn);
StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
// Add all CFLGraph nodes and all Dereference edges to StratifiedSets
@@ -331,7 +332,10 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
AnalysisKey CFLSteensAA::Key;
CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
- return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F));
+ auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & {
+ return AM.getResult<TargetLibraryAnalysis>(F);
+ };
+ return CFLSteensAAResult(GetTLI);
}
char CFLSteensAAWrapperPass::ID = 0;
@@ -347,8 +351,10 @@ CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
}
void CFLSteensAAWrapperPass::initializePass() {
- auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
- Result.reset(new CFLSteensAAResult(TLIWP.getTLI()));
+ auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+ Result.reset(new CFLSteensAAResult(GetTLI));
}
void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index ec5e94d499be..70aeb1a688ee 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
CallGraph::CallGraph(Module &M)
: M(M), ExternalCallingNode(getOrInsertFunction(nullptr)),
- CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) {
+ CallsExternalNode(std::make_unique<CallGraphNode>(nullptr)) {
// Add every function to the call graph.
for (Function &F : M)
addToCallGraph(&F);
@@ -150,7 +150,7 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
return CGN.get();
assert((!F || F->getParent() == &M) && "Function not in current module!");
- CGN = llvm::make_unique<CallGraphNode>(const_cast<Function *>(F));
+ CGN = std::make_unique<CallGraphNode>(const_cast<Function *>(F));
return CGN.get();
}
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index adaa83a6c443..20e2f06540a3 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -33,6 +33,22 @@ CaptureTracker::~CaptureTracker() {}
bool CaptureTracker::shouldExplore(const Use *U) { return true; }
+bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) {
+ // An inbounds GEP can either be a valid pointer (pointing into
+ // or to the end of an allocation), or be null in the default
+ // address space. So for an inbounds GEP there is no way to let
+ // the pointer escape using clever GEP hacking because doing so
+ // would make the pointer point outside of the allocated object
+ // and thus make the GEP result a poison value. Similarly, other
+ // dereferenceable pointers cannot be manipulated without producing
+ // poison.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
+ if (GEP->isInBounds())
+ return true;
+ bool CanBeNull;
+ return O->getPointerDereferenceableBytes(DL, CanBeNull);
+}
+
namespace {
struct SimpleCaptureTracker : public CaptureTracker {
explicit SimpleCaptureTracker(bool ReturnCaptures)
@@ -251,7 +267,8 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
// marked with nocapture do not capture. This means that places like
// GetUnderlyingObject in ValueTracking or DecomposeGEPExpression
// in BasicAA also need to know about this property.
- if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) {
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call,
+ true)) {
AddUses(Call);
break;
}
@@ -330,7 +347,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
AddUses(I);
break;
case Instruction::ICmp: {
- if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(1))) {
+ unsigned Idx = (I->getOperand(0) == V) ? 0 : 1;
+ unsigned OtherIdx = 1 - Idx;
+ if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) {
// Don't count comparisons of a no-alias return value against null as
// captures. This allows us to ignore comparisons of malloc results
// with null, for example.
@@ -338,29 +357,18 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
if (isNoAliasCall(V->stripPointerCasts()))
break;
if (!I->getFunction()->nullPointerIsDefined()) {
- auto *O = I->getOperand(0)->stripPointerCastsSameRepresentation();
- // An inbounds GEP can either be a valid pointer (pointing into
- // or to the end of an allocation), or be null in the default
- // address space. So for an inbounds GEPs there is no way to let
- // the pointer escape using clever GEP hacking because doing so
- // would make the pointer point outside of the allocated object
- // and thus make the GEP result a poison value.
- if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
- if (GEP->isInBounds())
- break;
- // Comparing a dereferenceable_or_null argument against null
- // cannot lead to pointer escapes, because if it is not null it
- // must be a valid (in-bounds) pointer.
- bool CanBeNull;
- if (O->getPointerDereferenceableBytes(I->getModule()->getDataLayout(), CanBeNull))
+ auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation();
+ // Comparing a dereferenceable_or_null pointer against null cannot
+ // lead to pointer escapes, because if it is not null it must be a
+ // valid (in-bounds) pointer.
+ if (Tracker->isDereferenceableOrNull(O, I->getModule()->getDataLayout()))
break;
}
}
// Comparison against value stored in global variable. Given the pointer
// does not escape, its value cannot be guessed and stored separately in a
// global variable.
- unsigned OtherIndex = (I->getOperand(0) == V) ? 1 : 0;
- auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIndex));
+ auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIdx));
if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
break;
// Otherwise, be conservative. There are crazy ways to capture pointers
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 20231ca78b45..8dbcf7034fda 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -93,6 +93,9 @@ static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
/// This always returns a non-null constant, but it may be a
/// ConstantExpr if unfoldable.
Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
+ assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&
+ "Invalid constantexpr bitcast!");
+
// Catch the obvious splat cases.
if (C->isNullValue() && !DestTy->isX86_MMXTy())
return Constant::getNullValue(DestTy);
@@ -521,8 +524,23 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
return nullptr;
C = FoldBitCast(C, MapTy->getPointerTo(AS), DL);
- if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL))
- return FoldBitCast(Res, LoadTy, DL);
+ if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) {
+ if (Res->isNullValue() && !LoadTy->isX86_MMXTy())
+ // Materializing a zero can be done trivially without a bitcast
+ return Constant::getNullValue(LoadTy);
+ Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;
+ Res = FoldBitCast(Res, CastTy, DL);
+ if (LoadTy->isPtrOrPtrVectorTy()) {
+ // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
+ if (Res->isNullValue() && !LoadTy->isX86_MMXTy())
+ return Constant::getNullValue(LoadTy);
+ if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
+ // Be careful not to replace a load of an addrspace value with an inttoptr here
+ return nullptr;
+ Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy);
+ }
+ return Res;
+ }
return nullptr;
}
@@ -544,7 +562,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
int64_t InitializerSize = DL.getTypeAllocSize(GV->getInitializer()->getType());
// If we're not accessing anything in this constant, the result is undefined.
- if (Offset + BytesLoaded <= 0)
+ if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
return UndefValue::get(IntType);
// If we're not accessing anything in this constant, the result is undefined.
@@ -781,10 +799,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
}
/// Strip the pointer casts, but preserve the address space information.
-Constant* StripPtrCastKeepAS(Constant* Ptr, Type *&ElemTy) {
+Constant *StripPtrCastKeepAS(Constant *Ptr, Type *&ElemTy) {
assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
auto *OldPtrTy = cast<PointerType>(Ptr->getType());
- Ptr = Ptr->stripPointerCasts();
+ Ptr = cast<Constant>(Ptr->stripPointerCasts());
auto *NewPtrTy = cast<PointerType>(Ptr->getType());
ElemTy = NewPtrTy->getPointerElementType();
@@ -1038,7 +1056,7 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
case Instruction::ExtractValue:
return ConstantExpr::getExtractValue(
- Ops[0], dyn_cast<ExtractValueInst>(InstOrCE)->getIndices());
+ Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());
case Instruction::InsertElement:
return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
case Instruction::ShuffleVector:
@@ -1464,40 +1482,50 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
if (!F->hasName())
return false;
- StringRef Name = F->getName();
// In these cases, the check of the length is required. We don't want to
// return true for a name like "cos\0blah" which strcmp would return equal to
// "cos", but has length 8.
+ StringRef Name = F->getName();
switch (Name[0]) {
default:
return false;
case 'a':
- return Name == "acos" || Name == "asin" || Name == "atan" ||
- Name == "atan2" || Name == "acosf" || Name == "asinf" ||
- Name == "atanf" || Name == "atan2f";
+ return Name == "acos" || Name == "acosf" ||
+ Name == "asin" || Name == "asinf" ||
+ Name == "atan" || Name == "atanf" ||
+ Name == "atan2" || Name == "atan2f";
case 'c':
- return Name == "ceil" || Name == "cos" || Name == "cosh" ||
- Name == "ceilf" || Name == "cosf" || Name == "coshf";
+ return Name == "ceil" || Name == "ceilf" ||
+ Name == "cos" || Name == "cosf" ||
+ Name == "cosh" || Name == "coshf";
case 'e':
- return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f";
+ return Name == "exp" || Name == "expf" ||
+ Name == "exp2" || Name == "exp2f";
case 'f':
- return Name == "fabs" || Name == "floor" || Name == "fmod" ||
- Name == "fabsf" || Name == "floorf" || Name == "fmodf";
+ return Name == "fabs" || Name == "fabsf" ||
+ Name == "floor" || Name == "floorf" ||
+ Name == "fmod" || Name == "fmodf";
case 'l':
- return Name == "log" || Name == "log10" || Name == "logf" ||
- Name == "log10f";
+ return Name == "log" || Name == "logf" ||
+ Name == "log2" || Name == "log2f" ||
+ Name == "log10" || Name == "log10f";
+ case 'n':
+ return Name == "nearbyint" || Name == "nearbyintf";
case 'p':
return Name == "pow" || Name == "powf";
case 'r':
- return Name == "round" || Name == "roundf";
+ return Name == "rint" || Name == "rintf" ||
+ Name == "round" || Name == "roundf";
case 's':
- return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
- Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
+ return Name == "sin" || Name == "sinf" ||
+ Name == "sinh" || Name == "sinhf" ||
+ Name == "sqrt" || Name == "sqrtf";
case 't':
- return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf";
+ return Name == "tan" || Name == "tanf" ||
+ Name == "tanh" || Name == "tanhf" ||
+ Name == "trunc" || Name == "truncf";
case '_':
-
// Check for various function names that get used for the math functions
// when the header files are preprocessed with the macro
// __FINITE_MATH_ONLY__ enabled.
@@ -1713,40 +1741,37 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
return nullptr;
- if (IntrinsicID == Intrinsic::round) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToAway);
- return ConstantFP::get(Ty->getContext(), V);
+ // Use internal versions of these intrinsics.
+ APFloat U = Op->getValueAPF();
+
+ if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {
+ U.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), U);
}
- if (IntrinsicID == Intrinsic::floor) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardNegative);
- return ConstantFP::get(Ty->getContext(), V);
+ if (IntrinsicID == Intrinsic::round) {
+ U.roundToIntegral(APFloat::rmNearestTiesToAway);
+ return ConstantFP::get(Ty->getContext(), U);
}
if (IntrinsicID == Intrinsic::ceil) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardPositive);
- return ConstantFP::get(Ty->getContext(), V);
+ U.roundToIntegral(APFloat::rmTowardPositive);
+ return ConstantFP::get(Ty->getContext(), U);
}
- if (IntrinsicID == Intrinsic::trunc) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmTowardZero);
- return ConstantFP::get(Ty->getContext(), V);
+ if (IntrinsicID == Intrinsic::floor) {
+ U.roundToIntegral(APFloat::rmTowardNegative);
+ return ConstantFP::get(Ty->getContext(), U);
}
- if (IntrinsicID == Intrinsic::rint) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToEven);
- return ConstantFP::get(Ty->getContext(), V);
+ if (IntrinsicID == Intrinsic::trunc) {
+ U.roundToIntegral(APFloat::rmTowardZero);
+ return ConstantFP::get(Ty->getContext(), U);
}
- if (IntrinsicID == Intrinsic::nearbyint) {
- APFloat V = Op->getValueAPF();
- V.roundToIntegral(APFloat::rmNearestTiesToEven);
- return ConstantFP::get(Ty->getContext(), V);
+ if (IntrinsicID == Intrinsic::fabs) {
+ U.clearSign();
+ return ConstantFP::get(Ty->getContext(), U);
}
/// We only fold functions with finite arguments. Folding NaN and inf is
@@ -1763,18 +1788,19 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
switch (IntrinsicID) {
default: break;
- case Intrinsic::fabs:
- return ConstantFoldFP(fabs, V, Ty);
- case Intrinsic::log2:
- return ConstantFoldFP(Log2, V, Ty);
case Intrinsic::log:
return ConstantFoldFP(log, V, Ty);
+ case Intrinsic::log2:
+ // TODO: What about hosts that lack a C99 library?
+ return ConstantFoldFP(Log2, V, Ty);
case Intrinsic::log10:
+ // TODO: What about hosts that lack a C99 library?
return ConstantFoldFP(log10, V, Ty);
case Intrinsic::exp:
return ConstantFoldFP(exp, V, Ty);
case Intrinsic::exp2:
- return ConstantFoldFP(exp2, V, Ty);
+ // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
+ return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
case Intrinsic::sin:
return ConstantFoldFP(sin, V, Ty);
case Intrinsic::cos:
@@ -1786,104 +1812,150 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
if (!TLI)
return nullptr;
- char NameKeyChar = Name[0];
- if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_')
- NameKeyChar = Name[2];
-
- switch (NameKeyChar) {
- case 'a':
- if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
- (Name == "acosf" && TLI->has(LibFunc_acosf)) ||
- (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) ||
- (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite)))
+ LibFunc Func = NotLibFunc;
+ TLI->getLibFunc(Name, Func);
+ switch (Func) {
+ default:
+ break;
+ case LibFunc_acos:
+ case LibFunc_acosf:
+ case LibFunc_acos_finite:
+ case LibFunc_acosf_finite:
+ if (TLI->has(Func))
return ConstantFoldFP(acos, V, Ty);
- else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
- (Name == "asinf" && TLI->has(LibFunc_asinf)) ||
- (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) ||
- (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite)))
+ break;
+ case LibFunc_asin:
+ case LibFunc_asinf:
+ case LibFunc_asin_finite:
+ case LibFunc_asinf_finite:
+ if (TLI->has(Func))
return ConstantFoldFP(asin, V, Ty);
- else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
- (Name == "atanf" && TLI->has(LibFunc_atanf)))
+ break;
+ case LibFunc_atan:
+ case LibFunc_atanf:
+ if (TLI->has(Func))
return ConstantFoldFP(atan, V, Ty);
break;
- case 'c':
- if ((Name == "ceil" && TLI->has(LibFunc_ceil)) ||
- (Name == "ceilf" && TLI->has(LibFunc_ceilf)))
- return ConstantFoldFP(ceil, V, Ty);
- else if ((Name == "cos" && TLI->has(LibFunc_cos)) ||
- (Name == "cosf" && TLI->has(LibFunc_cosf)))
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ if (TLI->has(Func)) {
+ U.roundToIntegral(APFloat::rmTowardPositive);
+ return ConstantFP::get(Ty->getContext(), U);
+ }
+ break;
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ if (TLI->has(Func))
return ConstantFoldFP(cos, V, Ty);
- else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
- (Name == "coshf" && TLI->has(LibFunc_coshf)) ||
- (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) ||
- (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite)))
+ break;
+ case LibFunc_cosh:
+ case LibFunc_coshf:
+ case LibFunc_cosh_finite:
+ case LibFunc_coshf_finite:
+ if (TLI->has(Func))
return ConstantFoldFP(cosh, V, Ty);
break;
- case 'e':
- if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
- (Name == "expf" && TLI->has(LibFunc_expf)) ||
- (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) ||
- (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite)))
+ case LibFunc_exp:
+ case LibFunc_expf:
+ case LibFunc_exp_finite:
+ case LibFunc_expf_finite:
+ if (TLI->has(Func))
return ConstantFoldFP(exp, V, Ty);
- if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
- (Name == "exp2f" && TLI->has(LibFunc_exp2f)) ||
- (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) ||
- (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite)))
- // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
- // C99 library.
+ break;
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2_finite:
+ case LibFunc_exp2f_finite:
+ if (TLI->has(Func))
+ // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
break;
- case 'f':
- if ((Name == "fabs" && TLI->has(LibFunc_fabs)) ||
- (Name == "fabsf" && TLI->has(LibFunc_fabsf)))
- return ConstantFoldFP(fabs, V, Ty);
- else if ((Name == "floor" && TLI->has(LibFunc_floor)) ||
- (Name == "floorf" && TLI->has(LibFunc_floorf)))
- return ConstantFoldFP(floor, V, Ty);
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ if (TLI->has(Func)) {
+ U.clearSign();
+ return ConstantFP::get(Ty->getContext(), U);
+ }
break;
- case 'l':
- if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
- (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) ||
- (Name == "__log_finite" && V > 0 &&
- TLI->has(LibFunc_log_finite)) ||
- (Name == "__logf_finite" && V > 0 &&
- TLI->has(LibFunc_logf_finite)))
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ if (TLI->has(Func)) {
+ U.roundToIntegral(APFloat::rmTowardNegative);
+ return ConstantFP::get(Ty->getContext(), U);
+ }
+ break;
+ case LibFunc_log:
+ case LibFunc_logf:
+ case LibFunc_log_finite:
+ case LibFunc_logf_finite:
+ if (V > 0.0 && TLI->has(Func))
return ConstantFoldFP(log, V, Ty);
- else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
- (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) ||
- (Name == "__log10_finite" && V > 0 &&
- TLI->has(LibFunc_log10_finite)) ||
- (Name == "__log10f_finite" && V > 0 &&
- TLI->has(LibFunc_log10f_finite)))
+ break;
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2_finite:
+ case LibFunc_log2f_finite:
+ if (V > 0.0 && TLI->has(Func))
+ // TODO: What about hosts that lack a C99 library?
+ return ConstantFoldFP(Log2, V, Ty);
+ break;
+ case LibFunc_log10:
+ case LibFunc_log10f:
+ case LibFunc_log10_finite:
+ case LibFunc_log10f_finite:
+ if (V > 0.0 && TLI->has(Func))
+ // TODO: What about hosts that lack a C99 library?
return ConstantFoldFP(log10, V, Ty);
break;
- case 'r':
- if ((Name == "round" && TLI->has(LibFunc_round)) ||
- (Name == "roundf" && TLI->has(LibFunc_roundf)))
- return ConstantFoldFP(round, V, Ty);
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ if (TLI->has(Func)) {
+ U.roundToIntegral(APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), U);
+ }
break;
- case 's':
- if ((Name == "sin" && TLI->has(LibFunc_sin)) ||
- (Name == "sinf" && TLI->has(LibFunc_sinf)))
+ case LibFunc_round:
+ case LibFunc_roundf:
+ if (TLI->has(Func)) {
+ U.roundToIntegral(APFloat::rmNearestTiesToAway);
+ return ConstantFP::get(Ty->getContext(), U);
+ }
+ break;
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ if (TLI->has(Func))
return ConstantFoldFP(sin, V, Ty);
- else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
- (Name == "sinhf" && TLI->has(LibFunc_sinhf)) ||
- (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) ||
- (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite)))
+ break;
+ case LibFunc_sinh:
+ case LibFunc_sinhf:
+ case LibFunc_sinh_finite:
+ case LibFunc_sinhf_finite:
+ if (TLI->has(Func))
return ConstantFoldFP(sinh, V, Ty);
- else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
- (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
+ break;
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ if (V >= 0.0 && TLI->has(Func))
return ConstantFoldFP(sqrt, V, Ty);
break;
- case 't':
- if ((Name == "tan" && TLI->has(LibFunc_tan)) ||
- (Name == "tanf" && TLI->has(LibFunc_tanf)))
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ if (TLI->has(Func))
return ConstantFoldFP(tan, V, Ty);
- else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) ||
- (Name == "tanhf" && TLI->has(LibFunc_tanhf)))
+ break;
+ case LibFunc_tanh:
+ case LibFunc_tanhf:
+ if (TLI->has(Func))
return ConstantFoldFP(tanh, V, Ty);
break;
- default:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ if (TLI->has(Func)) {
+ U.roundToIntegral(APFloat::rmTowardZero);
+ return ConstantFP::get(Ty->getContext(), U);
+ }
break;
}
return nullptr;
@@ -2002,19 +2074,35 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
if (!TLI)
return nullptr;
- if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
- (Name == "powf" && TLI->has(LibFunc_powf)) ||
- (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) ||
- (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite)))
- return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
- if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
- (Name == "fmodf" && TLI->has(LibFunc_fmodf)))
- return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
- if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
- (Name == "atan2f" && TLI->has(LibFunc_atan2f)) ||
- (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) ||
- (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite)))
- return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+
+ LibFunc Func = NotLibFunc;
+ TLI->getLibFunc(Name, Func);
+ switch (Func) {
+ default:
+ break;
+ case LibFunc_pow:
+ case LibFunc_powf:
+ case LibFunc_pow_finite:
+ case LibFunc_powf_finite:
+ if (TLI->has(Func))
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+ break;
+ case LibFunc_fmod:
+ case LibFunc_fmodf:
+ if (TLI->has(Func)) {
+ APFloat V = Op1->getValueAPF();
+ if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))
+ return ConstantFP::get(Ty->getContext(), V);
+ }
+ break;
+ case LibFunc_atan2:
+ case LibFunc_atan2f:
+ case LibFunc_atan2_finite:
+ case LibFunc_atan2f_finite:
+ if (TLI->has(Func))
+ return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+ break;
+ }
} else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
return ConstantFP::get(Ty->getContext(),
@@ -2041,20 +2129,27 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
switch (IntrinsicID) {
default: break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ // X - undef -> { undef, false }
+ // undef - X -> { undef, false }
+ // X + undef -> { undef, false }
+ // undef + x -> { undef, false }
+ if (!C0 || !C1) {
+ return ConstantStruct::get(
+ cast<StructType>(Ty),
+ {UndefValue::get(Ty->getStructElementType(0)),
+ Constant::getNullValue(Ty->getStructElementType(1))});
+ }
+ LLVM_FALLTHROUGH;
case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- // Even if both operands are undef, we cannot fold muls to undef
- // in the general case. For example, on i2 there are no inputs
- // that would produce { i2 -1, i1 true } as the result.
+ case Intrinsic::umul_with_overflow: {
+ // undef * X -> { 0, false }
+ // X * undef -> { 0, false }
if (!C0 || !C1)
return Constant::getNullValue(Ty);
- LLVM_FALLTHROUGH;
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow: {
- if (!C0 || !C1)
- return UndefValue::get(Ty);
APInt Res;
bool Overflow;
@@ -2194,13 +2289,9 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
case Intrinsic::fma:
case Intrinsic::fmuladd: {
APFloat V = Op1->getValueAPF();
- APFloat::opStatus s = V.fusedMultiplyAdd(Op2->getValueAPF(),
- Op3->getValueAPF(),
- APFloat::rmNearestTiesToEven);
- if (s != APFloat::opInvalidOp)
- return ConstantFP::get(Ty->getContext(), V);
-
- return nullptr;
+ V.fusedMultiplyAdd(Op2->getValueAPF(), Op3->getValueAPF(),
+ APFloat::rmNearestTiesToEven);
+ return ConstantFP::get(Ty->getContext(), V);
}
}
}
diff --git a/lib/Analysis/DDG.cpp b/lib/Analysis/DDG.cpp
new file mode 100644
index 000000000000..b5c3c761ad98
--- /dev/null
+++ b/lib/Analysis/DDG.cpp
@@ -0,0 +1,203 @@
+//===- DDG.cpp - Data Dependence Graph -------------------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation for the data dependence graph.
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/DDG.h"
+#include "llvm/Analysis/LoopInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ddg"
+
+template class llvm::DGEdge<DDGNode, DDGEdge>;
+template class llvm::DGNode<DDGNode, DDGEdge>;
+template class llvm::DirectedGraph<DDGNode, DDGEdge>;
+
+//===--------------------------------------------------------------------===//
+// DDGNode implementation
+//===--------------------------------------------------------------------===//
+DDGNode::~DDGNode() {}
+
+bool DDGNode::collectInstructions(
+ llvm::function_ref<bool(Instruction *)> const &Pred,
+ InstructionListType &IList) const {
+ assert(IList.empty() && "Expected the IList to be empty on entry.");
+ if (isa<SimpleDDGNode>(this)) {
+ for (auto *I : cast<const SimpleDDGNode>(this)->getInstructions())
+ if (Pred(I))
+ IList.push_back(I);
+ } else
+ llvm_unreachable("unimplemented type of node");
+ return !IList.empty();
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode::NodeKind K) {
+ const char *Out;
+ switch (K) {
+ case DDGNode::NodeKind::SingleInstruction:
+ Out = "single-instruction";
+ break;
+ case DDGNode::NodeKind::MultiInstruction:
+ Out = "multi-instruction";
+ break;
+ case DDGNode::NodeKind::Root:
+ Out = "root";
+ break;
+ case DDGNode::NodeKind::Unknown:
+ Out = "??";
+ break;
+ }
+ OS << Out;
+ return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) {
+ OS << "Node Address:" << &N << ":" << N.getKind() << "\n";
+ if (isa<SimpleDDGNode>(N)) {
+ OS << " Instructions:\n";
+ for (auto *I : cast<const SimpleDDGNode>(N).getInstructions())
+ OS.indent(2) << *I << "\n";
+ } else if (!isa<RootDDGNode>(N))
+ llvm_unreachable("unimplemented type of node");
+
+ OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n");
+ for (auto &E : N.getEdges())
+ OS.indent(2) << *E;
+ return OS;
+}
+
+//===--------------------------------------------------------------------===//
+// SimpleDDGNode implementation
+//===--------------------------------------------------------------------===//
+
+SimpleDDGNode::SimpleDDGNode(Instruction &I)
+ : DDGNode(NodeKind::SingleInstruction), InstList() {
+ assert(InstList.empty() && "Expected empty list.");
+ InstList.push_back(&I);
+}
+
+SimpleDDGNode::SimpleDDGNode(const SimpleDDGNode &N)
+ : DDGNode(N), InstList(N.InstList) {
+ assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) ||
+ (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) &&
+ "constructing from invalid simple node.");
+}
+
+SimpleDDGNode::SimpleDDGNode(SimpleDDGNode &&N)
+ : DDGNode(std::move(N)), InstList(std::move(N.InstList)) {
+ assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) ||
+ (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) &&
+ "constructing from invalid simple node.");
+}
+
+SimpleDDGNode::~SimpleDDGNode() { InstList.clear(); }
+
+//===--------------------------------------------------------------------===//
+// DDGEdge implementation
+//===--------------------------------------------------------------------===//
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K) {
+ const char *Out;
+ switch (K) {
+ case DDGEdge::EdgeKind::RegisterDefUse:
+ Out = "def-use";
+ break;
+ case DDGEdge::EdgeKind::MemoryDependence:
+ Out = "memory";
+ break;
+ case DDGEdge::EdgeKind::Rooted:
+ Out = "rooted";
+ break;
+ case DDGEdge::EdgeKind::Unknown:
+ Out = "??";
+ break;
+ }
+ OS << Out;
+ return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge &E) {
+ OS << "[" << E.getKind() << "] to " << &E.getTargetNode() << "\n";
+ return OS;
+}
+
+//===--------------------------------------------------------------------===//
+// DataDependenceGraph implementation
+//===--------------------------------------------------------------------===//
+using BasicBlockListType = SmallVector<BasicBlock *, 8>;
+
+DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D)
+ : DependenceGraphInfo(F.getName().str(), D) {
+ BasicBlockListType BBList;
+ for (auto &BB : F.getBasicBlockList())
+ BBList.push_back(&BB);
+ DDGBuilder(*this, D, BBList).populate();
+}
+
+DataDependenceGraph::DataDependenceGraph(const Loop &L, DependenceInfo &D)
+ : DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." +
+ L.getHeader()->getName())
+ .str(),
+ D) {
+ BasicBlockListType BBList;
+ for (BasicBlock *BB : L.blocks())
+ BBList.push_back(BB);
+ DDGBuilder(*this, D, BBList).populate();
+}
+
+DataDependenceGraph::~DataDependenceGraph() {
+ for (auto *N : Nodes) {
+ for (auto *E : *N)
+ delete E;
+ delete N;
+ }
+}
+
+bool DataDependenceGraph::addNode(DDGNode &N) {
+ if (!DDGBase::addNode(N))
+ return false;
+
+ // In general, if the root node is already created and linked, it is not safe
+ // to add new nodes since they may be unreachable by the root.
+ // TODO: Allow adding Pi-block nodes after root is created. Pi-blocks are an
+ // exception because they represent components that are already reachable by
+ // root.
+ assert(!Root && "Root node is already added. No more nodes can be added.");
+ if (isa<RootDDGNode>(N))
+ Root = &N;
+
+ return true;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DataDependenceGraph &G) {
+ for (auto *Node : G)
+ OS << *Node << "\n";
+ return OS;
+}
+
+//===--------------------------------------------------------------------===//
+// DDG Analysis Passes
+//===--------------------------------------------------------------------===//
+
+/// DDG as a loop pass.
+DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR) {
+ Function *F = L.getHeader()->getParent();
+ DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
+ return std::make_unique<DataDependenceGraph>(L, DI);
+}
+AnalysisKey DDGAnalysis::Key;
+
+PreservedAnalyses DDGAnalysisPrinterPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ OS << "'DDG' for loop '" << L.getHeader()->getName() << "':\n";
+ OS << *AM.getResult<DDGAnalysis>(L, AR);
+ return PreservedAnalyses::all();
+}
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 75f269e84f9d..0038c9fb9ce4 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -254,7 +254,7 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
LoopIndependent(PossiblyLoopIndependent) {
Consistent = true;
if (CommonLevels)
- DV = make_unique<DVEntry[]>(CommonLevels);
+ DV = std::make_unique<DVEntry[]>(CommonLevels);
}
// The rest are simple getters that hide the implementation.
@@ -3415,7 +3415,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
// can only analyze simple loads and stores, i.e., no calls, invokes, etc.
LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
- return make_unique<Dependence>(Src, Dst);
+ return std::make_unique<Dependence>(Src, Dst);
}
assert(isLoadOrStore(Src) && "instruction is not load or store");
@@ -3430,7 +3430,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
case PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
- return make_unique<Dependence>(Src, Dst);
+ return std::make_unique<Dependence>(Src, Dst);
case NoAlias:
// If the objects noalias, they are distinct, accesses are independent.
LLVM_DEBUG(dbgs() << "no alias\n");
@@ -3777,7 +3777,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
return nullptr;
}
- return make_unique<FullDependence>(std::move(Result));
+ return std::make_unique<FullDependence>(std::move(Result));
}
diff --git a/lib/Analysis/DependenceGraphBuilder.cpp b/lib/Analysis/DependenceGraphBuilder.cpp
new file mode 100644
index 000000000000..ed1d8351b2f0
--- /dev/null
+++ b/lib/Analysis/DependenceGraphBuilder.cpp
@@ -0,0 +1,228 @@
+//===- DependenceGraphBuilder.cpp ------------------------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file implements common steps of the build algorithm for construction
+// of dependence graphs such as DDG and PDG.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DependenceGraphBuilder.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DDG.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dgb"
+
+STATISTIC(TotalGraphs, "Number of dependence graphs created.");
+STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
+STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
+STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
+STATISTIC(TotalConfusedEdges,
+ "Number of confused memory dependencies between two nodes.");
+STATISTIC(TotalEdgeReversals,
+ "Number of times the source and sink of dependence was reversed to "
+ "expose cycles in the graph.");
+
+using InstructionListType = SmallVector<Instruction *, 2>;
+
+//===--------------------------------------------------------------------===//
+// AbstractDependenceGraphBuilder implementation
+//===--------------------------------------------------------------------===//
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
+ ++TotalGraphs;
+ assert(IMap.empty() && "Expected empty instruction map at start");
+ for (BasicBlock *BB : BBList)
+ for (Instruction &I : *BB) {
+ auto &NewNode = createFineGrainedNode(I);
+ IMap.insert(std::make_pair(&I, &NewNode));
+ ++TotalFineGrainedNodes;
+ }
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() {
+ // Create a root node that connects to every connected component of the graph.
+ // This is done to allow graph iterators to visit all the disjoint components
+ // of the graph, in a single walk.
+ //
+ // This algorithm works by going through each node of the graph and for each
+ // node N, do a DFS starting from N. A rooted edge is established between the
+ // root node and N (if N is not yet visited). All the nodes reachable from N
+ // are marked as visited and are skipped in the DFS of subsequent nodes.
+ //
+ // Note: This algorithm tries to limit the number of edges out of the root
+ // node to some extent, but there may be redundant edges created depending on
+ // the iteration order. For example for a graph {A -> B}, an edge from the
+ // root node is added to both nodes if B is visited before A. While it does
+ // not result in minimal number of edges, this approach saves compile-time
+ // while keeping the number of edges in check.
+ auto &RootNode = createRootNode();
+ df_iterator_default_set<const NodeType *, 4> Visited;
+ for (auto *N : Graph) {
+ if (*N == RootNode)
+ continue;
+ for (auto I : depth_first_ext(N, Visited))
+ if (I == N)
+ createRootedEdge(RootNode, *N);
+ }
+}
+
+template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() {
+ for (NodeType *N : Graph) {
+ InstructionListType SrcIList;
+ N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
+
+ // Use a set to mark the targets that we link to N, so we don't add
+ // duplicate def-use edges when more than one instruction in a target node
+ // use results of instructions that are contained in N.
+ SmallPtrSet<NodeType *, 4> VisitedTargets;
+
+ for (Instruction *II : SrcIList) {
+ for (User *U : II->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ if (!UI)
+ continue;
+ NodeType *DstNode = nullptr;
+ if (IMap.find(UI) != IMap.end())
+ DstNode = IMap.find(UI)->second;
+
+ // In the case of loops, the scope of the subgraph is all the
+ // basic blocks (and instructions within them) belonging to the loop. We
+ // simply ignore all the edges coming from (or going into) instructions
+ // or basic blocks outside of this range.
+ if (!DstNode) {
+ LLVM_DEBUG(
+ dbgs()
+ << "skipped def-use edge since the sink" << *UI
+ << " is outside the range of instructions being considered.\n");
+ continue;
+ }
+
+ // Self dependencies are ignored because they are redundant and
+ // uninteresting.
+ if (DstNode == N) {
+ LLVM_DEBUG(dbgs()
+ << "skipped def-use edge since the sink and the source ("
+ << N << ") are the same.\n");
+ continue;
+ }
+
+ if (VisitedTargets.insert(DstNode).second) {
+ createDefUseEdge(*N, *DstNode);
+ ++TotalDefUseEdges;
+ }
+ }
+ }
+ }
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
+ using DGIterator = typename G::iterator;
+ auto isMemoryAccess = [](const Instruction *I) {
+ return I->mayReadOrWriteMemory();
+ };
+ for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
+ InstructionListType SrcIList;
+ (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
+ if (SrcIList.empty())
+ continue;
+
+ for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
+ if (**SrcIt == **DstIt)
+ continue;
+ InstructionListType DstIList;
+ (*DstIt)->collectInstructions(isMemoryAccess, DstIList);
+ if (DstIList.empty())
+ continue;
+ bool ForwardEdgeCreated = false;
+ bool BackwardEdgeCreated = false;
+ for (Instruction *ISrc : SrcIList) {
+ for (Instruction *IDst : DstIList) {
+ auto D = DI.depends(ISrc, IDst, true);
+ if (!D)
+ continue;
+
+ // If we have a dependence with its left-most non-'=' direction
+ // being '>' we need to reverse the direction of the edge, because
+ // the source of the dependence cannot occur after the sink. For
+ // confused dependencies, we will create edges in both directions to
+ // represent the possibility of a cycle.
+
+ auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
+ if (!ForwardEdgeCreated) {
+ createMemoryEdge(Src, Dst);
+ ++TotalMemoryEdges;
+ }
+ if (!BackwardEdgeCreated) {
+ createMemoryEdge(Dst, Src);
+ ++TotalMemoryEdges;
+ }
+ ForwardEdgeCreated = BackwardEdgeCreated = true;
+ ++TotalConfusedEdges;
+ };
+
+ auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
+ if (!ForwardEdgeCreated) {
+ createMemoryEdge(Src, Dst);
+ ++TotalMemoryEdges;
+ }
+ ForwardEdgeCreated = true;
+ };
+
+ auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
+ if (!BackwardEdgeCreated) {
+ createMemoryEdge(Dst, Src);
+ ++TotalMemoryEdges;
+ }
+ BackwardEdgeCreated = true;
+ };
+
+ if (D->isConfused())
+ createConfusedEdges(**SrcIt, **DstIt);
+ else if (D->isOrdered() && !D->isLoopIndependent()) {
+ bool ReversedEdge = false;
+ for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
+ if (D->getDirection(Level) == Dependence::DVEntry::EQ)
+ continue;
+ else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
+ createBackwardEdge(**SrcIt, **DstIt);
+ ReversedEdge = true;
+ ++TotalEdgeReversals;
+ break;
+ } else if (D->getDirection(Level) == Dependence::DVEntry::LT)
+ break;
+ else {
+ createConfusedEdges(**SrcIt, **DstIt);
+ break;
+ }
+ }
+ if (!ReversedEdge)
+ createForwardEdge(**SrcIt, **DstIt);
+ } else
+ createForwardEdge(**SrcIt, **DstIt);
+
+ // Avoid creating duplicate edges.
+ if (ForwardEdgeCreated && BackwardEdgeCreated)
+ break;
+ }
+
+ // If we've created edges in both directions, there is no more
+ // unique edge that we can create between these two nodes, so we
+ // can exit early.
+ if (ForwardEdgeCreated && BackwardEdgeCreated)
+ break;
+ }
+ }
+ }
+}
+
+template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
+template class llvm::DependenceGraphInfo<DDGNode>;
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
index 0ccd59ef2bfd..3d1be1e1cce0 100644
--- a/lib/Analysis/DivergenceAnalysis.cpp
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -412,6 +412,12 @@ bool DivergenceAnalysis::isDivergent(const Value &V) const {
return DivergentValues.find(&V) != DivergentValues.end();
}
+bool DivergenceAnalysis::isDivergentUse(const Use &U) const {
+ Value &V = *U.get();
+ Instruction &I = *cast<Instruction>(U.getUser());
+ return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
+}
+
void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
if (DivergentValues.empty())
return;
@@ -449,6 +455,10 @@ bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
return DA.isDivergent(val);
}
+bool GPUDivergenceAnalysis::isDivergentUse(const Use &use) const {
+ return DA.isDivergentUse(use);
+}
+
void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
DA.print(OS, mod);
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index 0d6c0ffb18a8..efdf9706ba3c 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -370,7 +370,8 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
// passing into the function.
if (Call->isDataOperand(&U)) {
// Detect calls to free.
- if (Call->isArgOperand(&U) && isFreeCall(I, &TLI)) {
+ if (Call->isArgOperand(&U) &&
+ isFreeCall(I, &GetTLI(*Call->getFunction()))) {
if (Writers)
Writers->insert(Call->getParent()->getParent());
} else {
@@ -432,7 +433,7 @@ bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
GV->getParent()->getDataLayout());
- if (!isAllocLikeFn(Ptr, &TLI))
+ if (!isAllocLikeFn(Ptr, &GetTLI(*SI->getFunction())))
return false; // Too hard to analyze.
// Analyze all uses of the allocation. If any of them are used in a
@@ -576,6 +577,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// We handle calls specially because the graph-relevant aspects are
// handled above.
if (auto *Call = dyn_cast<CallBase>(&I)) {
+ auto &TLI = GetTLI(*Node->getFunction());
if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) {
// FIXME: It is completely unclear why this is necessary and not
// handled by the above graph code.
@@ -937,12 +939,13 @@ ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc, AAQI));
}
-GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
- const TargetLibraryInfo &TLI)
- : AAResultBase(), DL(DL), TLI(TLI) {}
+GlobalsAAResult::GlobalsAAResult(
+ const DataLayout &DL,
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
+ : AAResultBase(), DL(DL), GetTLI(std::move(GetTLI)) {}
GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
- : AAResultBase(std::move(Arg)), DL(Arg.DL), TLI(Arg.TLI),
+ : AAResultBase(std::move(Arg)), DL(Arg.DL), GetTLI(std::move(Arg.GetTLI)),
NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)),
IndirectGlobals(std::move(Arg.IndirectGlobals)),
AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)),
@@ -957,10 +960,10 @@ GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
GlobalsAAResult::~GlobalsAAResult() {}
-/*static*/ GlobalsAAResult
-GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
- CallGraph &CG) {
- GlobalsAAResult Result(M.getDataLayout(), TLI);
+/*static*/ GlobalsAAResult GlobalsAAResult::analyzeModule(
+ Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
+ CallGraph &CG) {
+ GlobalsAAResult Result(M.getDataLayout(), GetTLI);
// Discover which functions aren't recursive, to feed into AnalyzeGlobals.
Result.CollectSCCMembership(CG);
@@ -977,8 +980,12 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
AnalysisKey GlobalsAA::Key;
GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) {
- return GlobalsAAResult::analyzeModule(M,
- AM.getResult<TargetLibraryAnalysis>(M),
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+ return GlobalsAAResult::analyzeModule(M, GetTLI,
AM.getResult<CallGraphAnalysis>(M));
}
@@ -999,9 +1006,11 @@ GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) {
}
bool GlobalsAAWrapperPass::runOnModule(Module &M) {
+ auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule(
- M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
- getAnalysis<CallGraphWrapperPass>().getCallGraph())));
+ M, GetTLI, getAnalysis<CallGraphWrapperPass>().getCallGraph())));
return false;
}
diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp
index ce285f82f720..6fb600114bc6 100644
--- a/lib/Analysis/IVDescriptors.cpp
+++ b/lib/Analysis/IVDescriptors.cpp
@@ -300,7 +300,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
if (!ReduxDesc.isRecurrence())
return false;
- if (isa<FPMathOperator>(ReduxDesc.getPatternInst()))
+ // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
+ if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi)
FMF &= ReduxDesc.getPatternInst()->getFastMathFlags();
}
diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index 6ff840efcb64..68153de8219f 100644
--- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -53,7 +53,7 @@ static cl::opt<unsigned>
"call callsite"));
ICallPromotionAnalysis::ICallPromotionAnalysis() {
- ValueDataArray = llvm::make_unique<InstrProfValueData[]>(MaxNumPromotions);
+ ValueDataArray = std::make_unique<InstrProfValueData[]>(MaxNumPromotions);
}
bool ICallPromotionAnalysis::isPromotionProfitable(uint64_t Count,
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 0dec146e0465..89811ec0e377 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -436,7 +436,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
Type *Ty = I.getAllocatedType();
AllocatedSize = SaturatingMultiplyAdd(
- AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
+ AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getFixedSize(),
+ AllocatedSize);
return Base::visitAlloca(I);
}
}
@@ -444,7 +445,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
// Accumulate the allocated size.
if (I.isStaticAlloca()) {
Type *Ty = I.getAllocatedType();
- AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
+ AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getFixedSize(),
+ AllocatedSize);
}
// We will happily inline static alloca instructions.
@@ -1070,8 +1072,8 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
Value *SimpleV = nullptr;
if (auto FI = dyn_cast<FPMathOperator>(&I))
- SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
- CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
+ SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS,
+ CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL);
else
SimpleV =
SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL);
@@ -1453,19 +1455,6 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
// Maximum valid cost increased in this function.
int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
- // Exit early for a large switch, assuming one case needs at least one
- // instruction.
- // FIXME: This is not true for a bit test, but ignore such case for now to
- // save compile-time.
- int64_t CostLowerBound =
- std::min((int64_t)CostUpperBound,
- (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
-
- if (CostLowerBound > Threshold && !ComputeFullInlineCost) {
- addCost((int64_t)SI.getNumCases() * InlineConstants::InstrCost);
- return false;
- }
-
unsigned JumpTableSize = 0;
unsigned NumCaseCluster =
TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index e34bf6f4e43f..cb8987721700 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -56,8 +56,8 @@ static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &,
const SimplifyQuery &, unsigned);
static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
-static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
- const SimplifyQuery &, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &,
+ const SimplifyQuery &, unsigned);
static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -1371,7 +1371,8 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
/// Commuted variants are assumed to be handled by calling this function again
/// with the parameters swapped.
static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
- ICmpInst *UnsignedICmp, bool IsAnd) {
+ ICmpInst *UnsignedICmp, bool IsAnd,
+ const SimplifyQuery &Q) {
Value *X, *Y;
ICmpInst::Predicate EqPred;
@@ -1380,6 +1381,59 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
return nullptr;
ICmpInst::Predicate UnsignedPred;
+
+ Value *A, *B;
+ // Y = (A - B);
+ if (match(Y, m_Sub(m_Value(A), m_Value(B)))) {
+ if (match(UnsignedICmp,
+ m_c_ICmp(UnsignedPred, m_Specific(A), m_Specific(B))) &&
+ ICmpInst::isUnsigned(UnsignedPred)) {
+ if (UnsignedICmp->getOperand(0) != A)
+ UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
+
+ // A >=/<= B || (A - B) != 0 <--> true
+ if ((UnsignedPred == ICmpInst::ICMP_UGE ||
+ UnsignedPred == ICmpInst::ICMP_ULE) &&
+ EqPred == ICmpInst::ICMP_NE && !IsAnd)
+ return ConstantInt::getTrue(UnsignedICmp->getType());
+ // A </> B && (A - B) == 0 <--> false
+ if ((UnsignedPred == ICmpInst::ICMP_ULT ||
+ UnsignedPred == ICmpInst::ICMP_UGT) &&
+ EqPred == ICmpInst::ICMP_EQ && IsAnd)
+ return ConstantInt::getFalse(UnsignedICmp->getType());
+
+ // A </> B && (A - B) != 0 <--> A </> B
+ // A </> B || (A - B) != 0 <--> (A - B) != 0
+ if (EqPred == ICmpInst::ICMP_NE && (UnsignedPred == ICmpInst::ICMP_ULT ||
+ UnsignedPred == ICmpInst::ICMP_UGT))
+ return IsAnd ? UnsignedICmp : ZeroICmp;
+
+ // A <=/>= B && (A - B) == 0 <--> (A - B) == 0
+ // A <=/>= B || (A - B) == 0 <--> A <=/>= B
+ if (EqPred == ICmpInst::ICMP_EQ && (UnsignedPred == ICmpInst::ICMP_ULE ||
+ UnsignedPred == ICmpInst::ICMP_UGE))
+ return IsAnd ? ZeroICmp : UnsignedICmp;
+ }
+
+ // Given Y = (A - B)
+ // Y >= A && Y != 0 --> Y >= A iff B != 0
+ // Y < A || Y == 0 --> Y < A iff B != 0
+ if (match(UnsignedICmp,
+ m_c_ICmp(UnsignedPred, m_Specific(Y), m_Specific(A)))) {
+ if (UnsignedICmp->getOperand(0) != Y)
+ UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
+
+ if (UnsignedPred == ICmpInst::ICMP_UGE && IsAnd &&
+ EqPred == ICmpInst::ICMP_NE &&
+ isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+ return UnsignedICmp;
+ if (UnsignedPred == ICmpInst::ICMP_ULT && !IsAnd &&
+ EqPred == ICmpInst::ICMP_EQ &&
+ isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+ return UnsignedICmp;
+ }
+ }
+
if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) &&
ICmpInst::isUnsigned(UnsignedPred))
;
@@ -1395,19 +1449,33 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE)
return IsAnd ? UnsignedICmp : ZeroICmp;
- // X >= Y || Y != 0 --> true
+ // X <= Y && Y != 0 --> X <= Y iff X != 0
+ // X <= Y || Y != 0 --> Y != 0 iff X != 0
+ if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE &&
+ isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+ return IsAnd ? UnsignedICmp : ZeroICmp;
+
+ // X >= Y && Y == 0 --> Y == 0
// X >= Y || Y == 0 --> X >= Y
- if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) {
- if (EqPred == ICmpInst::ICMP_NE)
- return getTrue(UnsignedICmp->getType());
- return UnsignedICmp;
- }
+ if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ)
+ return IsAnd ? ZeroICmp : UnsignedICmp;
+
+ // X > Y && Y == 0 --> Y == 0 iff X != 0
+ // X > Y || Y == 0 --> X > Y iff X != 0
+ if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ &&
+ isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+ return IsAnd ? ZeroICmp : UnsignedICmp;
// X < Y && Y == 0 --> false
if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ &&
IsAnd)
return getFalse(UnsignedICmp->getType());
+ // X >= Y || Y != 0 --> true
+ if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_NE &&
+ !IsAnd)
+ return getTrue(UnsignedICmp->getType());
+
return nullptr;
}
@@ -1587,10 +1655,10 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
}
static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
- const InstrInfoQuery &IIQ) {
- if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
+ const SimplifyQuery &Q) {
+ if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q))
return X;
- if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true))
+ if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true, Q))
return X;
if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
@@ -1604,9 +1672,9 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true))
return X;
- if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, IIQ))
+ if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, Q.IIQ))
return X;
- if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, IIQ))
+ if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, Q.IIQ))
return X;
return nullptr;
@@ -1660,10 +1728,10 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
}
static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
- const InstrInfoQuery &IIQ) {
- if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
+ const SimplifyQuery &Q) {
+ if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false, Q))
return X;
- if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false))
+ if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false, Q))
return X;
if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
@@ -1677,9 +1745,9 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false))
return X;
- if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, IIQ))
+ if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, Q.IIQ))
return X;
- if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, IIQ))
+ if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, Q.IIQ))
return X;
return nullptr;
@@ -1738,8 +1806,8 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
auto *ICmp0 = dyn_cast<ICmpInst>(Op0);
auto *ICmp1 = dyn_cast<ICmpInst>(Op1);
if (ICmp0 && ICmp1)
- V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ)
- : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ);
+ V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q)
+ : simplifyOrOfICmps(ICmp0, ICmp1, Q);
auto *FCmp0 = dyn_cast<FCmpInst>(Op0);
auto *FCmp1 = dyn_cast<FCmpInst>(Op1);
@@ -1759,6 +1827,77 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
return nullptr;
}
+/// Check that the Op1 is in expected form, i.e.:
+/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+/// %Op1 = extractvalue { i4, i1 } %Agg, 1
+static bool omitCheckForZeroBeforeMulWithOverflowInternal(Value *Op1,
+ Value *X) {
+ auto *Extract = dyn_cast<ExtractValueInst>(Op1);
+ // We should only be extracting the overflow bit.
+ if (!Extract || !Extract->getIndices().equals(1))
+ return false;
+ Value *Agg = Extract->getAggregateOperand();
+ // This should be a multiplication-with-overflow intrinsic.
+ if (!match(Agg, m_CombineOr(m_Intrinsic<Intrinsic::umul_with_overflow>(),
+ m_Intrinsic<Intrinsic::smul_with_overflow>())))
+ return false;
+ // One of its multipliers should be the value we checked for zero before.
+ if (!match(Agg, m_CombineOr(m_Argument<0>(m_Specific(X)),
+ m_Argument<1>(m_Specific(X)))))
+ return false;
+ return true;
+}
+
+/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
+/// other form of check, e.g. one that was using division; it may have been
+/// guarded against division-by-zero. We can drop that check now.
+/// Look for:
+/// %Op0 = icmp ne i4 %X, 0
+/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+/// %Op1 = extractvalue { i4, i1 } %Agg, 1
+/// %??? = and i1 %Op0, %Op1
+/// We can just return %Op1
+static Value *omitCheckForZeroBeforeMulWithOverflow(Value *Op0, Value *Op1) {
+ ICmpInst::Predicate Pred;
+ Value *X;
+ if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
+ Pred != ICmpInst::Predicate::ICMP_NE)
+ return nullptr;
+ // Is Op1 in expected form?
+ if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
+ return nullptr;
+ // Can omit 'and', and just return the overflow bit.
+ return Op1;
+}
+
+/// The @llvm.[us]mul.with.overflow intrinsic could have been folded from some
+/// other form of check, e.g. one that was using division; it may have been
+/// guarded against division-by-zero. We can drop that check now.
+/// Look for:
+/// %Op0 = icmp eq i4 %X, 0
+/// %Agg = tail call { i4, i1 } @llvm.[us]mul.with.overflow.i4(i4 %X, i4 %???)
+/// %Op1 = extractvalue { i4, i1 } %Agg, 1
+/// %NotOp1 = xor i1 %Op1, true
+/// %or = or i1 %Op0, %NotOp1
+/// We can just return %NotOp1
+static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0,
+ Value *NotOp1) {
+ ICmpInst::Predicate Pred;
+ Value *X;
+ if (!match(Op0, m_ICmp(Pred, m_Value(X), m_Zero())) ||
+ Pred != ICmpInst::Predicate::ICMP_EQ)
+ return nullptr;
+ // We expect the other hand of an 'or' to be a 'not'.
+ Value *Op1;
+ if (!match(NotOp1, m_Not(m_Value(Op1))))
+ return nullptr;
+ // Is Op1 in expected form?
+ if (!omitCheckForZeroBeforeMulWithOverflowInternal(Op1, X))
+ return nullptr;
+ // Can omit 'and', and just return the inverted overflow bit.
+ return NotOp1;
+}
+
/// Given operands for an And, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -1813,6 +1952,14 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Op0;
}
+ // If we have a multiplication overflow check that is being 'and'ed with a
+ // check that one of the multipliers is not zero, we can omit the 'and', and
+ // only keep the overflow check.
+ if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op0, Op1))
+ return V;
+ if (Value *V = omitCheckForZeroBeforeMulWithOverflow(Op1, Op0))
+ return V;
+
// A & (-A) = A if A is a power of two or zero.
if (match(Op0, m_Neg(m_Specific(Op1))) ||
match(Op1, m_Neg(m_Specific(Op0)))) {
@@ -1987,6 +2134,14 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
return V;
+ // If we have a multiplication overflow check that is being 'and'ed with a
+ // check that one of the multipliers is not zero, we can omit the 'and', and
+ // only keep the overflow check.
+ if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op0, Op1))
+ return V;
+ if (Value *V = omitCheckForZeroBeforeInvertedMulWithOverflow(Op1, Op0))
+ return V;
+
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
MaxRecurse))
@@ -3529,6 +3684,9 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// %sel = select i1 %cmp, i32 -2147483648, i32 %add
//
// We can't replace %sel with %add unless we strip away the flags.
+ // TODO: This is an unusual limitation because better analysis results in
+ // worse simplification. InstCombine can do this fold more generally
+ // by dropping the flags. Remove this fold to save compile-time?
if (isa<OverflowingBinaryOperator>(B))
if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B))
return nullptr;
@@ -4324,14 +4482,16 @@ static Constant *propagateNaN(Constant *In) {
return In;
}
-static Constant *simplifyFPBinop(Value *Op0, Value *Op1) {
- if (isa<UndefValue>(Op0) || isa<UndefValue>(Op1))
- return ConstantFP::getNaN(Op0->getType());
+/// Perform folds that are common to any floating-point operation. This implies
+/// transforms based on undef/NaN because the operation itself makes no
+/// difference to the result.
+static Constant *simplifyFPOp(ArrayRef<Value *> Ops) {
+ if (any_of(Ops, [](Value *V) { return isa<UndefValue>(V); }))
+ return ConstantFP::getNaN(Ops[0]->getType());
- if (match(Op0, m_NaN()))
- return propagateNaN(cast<Constant>(Op0));
- if (match(Op1, m_NaN()))
- return propagateNaN(cast<Constant>(Op1));
+ for (Value *V : Ops)
+ if (match(V, m_NaN()))
+ return propagateNaN(cast<Constant>(V));
return nullptr;
}
@@ -4343,7 +4503,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPBinop(Op0, Op1))
+ if (Constant *C = simplifyFPOp({Op0, Op1}))
return C;
// fadd X, -0 ==> X
@@ -4390,7 +4550,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPBinop(Op0, Op1))
+ if (Constant *C = simplifyFPOp({Op0, Op1}))
return C;
// fsub X, +0 ==> X
@@ -4430,23 +4590,27 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return nullptr;
}
-/// Given the operands for an FMul, see if we can fold the result
-static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const SimplifyQuery &Q, unsigned MaxRecurse) {
- if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
- return C;
-
- if (Constant *C = simplifyFPBinop(Op0, Op1))
+static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ if (Constant *C = simplifyFPOp({Op0, Op1}))
return C;
// fmul X, 1.0 ==> X
if (match(Op1, m_FPOne()))
return Op0;
+ // fmul 1.0, X ==> X
+ if (match(Op0, m_FPOne()))
+ return Op1;
+
// fmul nnan nsz X, 0 ==> 0
if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP()))
return ConstantFP::getNullValue(Op0->getType());
+ // fmul nnan nsz 0, X ==> 0
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()))
+ return ConstantFP::getNullValue(Op1->getType());
+
// sqrt(X) * sqrt(X) --> X, if we can:
// 1. Remove the intermediate rounding (reassociate).
// 2. Ignore non-zero negative numbers because sqrt would produce NAN.
@@ -4459,6 +4623,16 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return nullptr;
}
+/// Given the operands for an FMul, see if we can fold the result
+static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
+ return C;
+
+ // Now apply simplifications that do not require rounding.
+ return SimplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse);
+}
+
Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q) {
return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit);
@@ -4475,12 +4649,17 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit);
}
+Value *llvm::SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned) {
if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPBinop(Op0, Op1))
+ if (Constant *C = simplifyFPOp({Op0, Op1}))
return C;
// X / 1.0 -> X
@@ -4525,7 +4704,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q))
return C;
- if (Constant *C = simplifyFPBinop(Op0, Op1))
+ if (Constant *C = simplifyFPOp({Op0, Op1}))
return C;
// Unlike fdiv, the result of frem always matches the sign of the dividend.
@@ -4564,8 +4743,7 @@ static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q,
/// Given the operand for a UnaryOperator, see if we can fold the result.
/// If not, this returns null.
-/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
+/// Try to use FastMathFlags when folding the result.
static Value *simplifyFPUnOp(unsigned Opcode, Value *Op,
const FastMathFlags &FMF,
const SimplifyQuery &Q, unsigned MaxRecurse) {
@@ -4581,8 +4759,8 @@ Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit);
}
-Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
- const SimplifyQuery &Q) {
+Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit);
}
@@ -4634,11 +4812,10 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// Given operands for a BinaryOperator, see if we can fold the result.
/// If not, this returns null.
-/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
-static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const FastMathFlags &FMF, const SimplifyQuery &Q,
- unsigned MaxRecurse) {
+/// Try to use FastMathFlags when folding the result.
+static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const FastMathFlags &FMF, const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::FAdd:
return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
@@ -4658,9 +4835,9 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit);
}
-Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- FastMathFlags FMF, const SimplifyQuery &Q) {
- return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ FastMathFlags FMF, const SimplifyQuery &Q) {
+ return ::SimplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
}
/// Given operands for a CmpInst, see if we can fold the result.
@@ -5009,6 +5186,15 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
}
return nullptr;
}
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd: {
+ Value *Op0 = Call->getArgOperand(0);
+ Value *Op1 = Call->getArgOperand(1);
+ Value *Op2 = Call->getArgOperand(2);
+ if (Value *V = simplifyFPOp({ Op0, Op1, Op2 }))
+ return V;
+ return nullptr;
+ }
default:
return nullptr;
}
@@ -5221,14 +5407,16 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
/// If we have a pre-simplified value in 'SimpleV', that is forcibly used to
/// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of
/// instructions to process and attempt to simplify it using
-/// InstructionSimplify.
+/// InstructionSimplify. Recursively visited users which could not be
+/// simplified themselves are to the optional UnsimplifiedUsers set for
+/// further processing by the caller.
///
/// This routine returns 'true' only when *it* simplifies something. The passed
/// in simplified value does not count toward this.
-static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionCache *AC) {
+static bool replaceAndRecursivelySimplifyImpl(
+ Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ SmallSetVector<Instruction *, 8> *UnsimplifiedUsers = nullptr) {
bool Simplified = false;
SmallSetVector<Instruction *, 8> Worklist;
const DataLayout &DL = I->getModule()->getDataLayout();
@@ -5258,8 +5446,11 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
// See if this instruction simplifies.
SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC});
- if (!SimpleV)
+ if (!SimpleV) {
+ if (UnsimplifiedUsers)
+ UnsimplifiedUsers->insert(I);
continue;
+ }
Simplified = true;
@@ -5285,16 +5476,17 @@ bool llvm::recursivelySimplifyInstruction(Instruction *I,
const TargetLibraryInfo *TLI,
const DominatorTree *DT,
AssumptionCache *AC) {
- return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC);
+ return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC, nullptr);
}
-bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT,
- AssumptionCache *AC) {
+bool llvm::replaceAndRecursivelySimplify(
+ Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ SmallSetVector<Instruction *, 8> *UnsimplifiedUsers) {
assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
assert(SimpleV && "Must provide a simplified value.");
- return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC);
+ return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC,
+ UnsimplifiedUsers);
}
namespace llvm {
@@ -5302,7 +5494,7 @@ const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) {
auto *DTWP = P.getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *TLIWP = P.getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
+ auto *TLI = TLIWP ? &TLIWP->getTLI(F) : nullptr;
auto *ACWP = P.getAnalysisIfAvailable<AssumptionCacheTracker>();
auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
return {F.getParent()->getDataLayout(), TLI, DT, AC};
diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp
index f2592c26b373..e727de468a0d 100644
--- a/lib/Analysis/LazyBranchProbabilityInfo.cpp
+++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -55,8 +55,9 @@ void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); }
bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) {
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI, &TLI);
+ TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ LBPI = std::make_unique<LazyBranchProbabilityInfo>(&F, &LI, &TLI);
return false;
}
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index 797fcf516429..ef31c1e0ba8c 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -150,7 +150,8 @@ static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName());
}
-LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
+LazyCallGraph::LazyCallGraph(
+ Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
LLVM_DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier()
<< "\n");
for (Function &F : M) {
@@ -159,7 +160,7 @@ LazyCallGraph::LazyCallGraph(Module &M, TargetLibraryInfo &TLI) {
// If this function is a known lib function to LLVM then we want to
// synthesize reference edges to it to model the fact that LLVM can turn
// arbitrary code into a library function call.
- if (isKnownLibFunction(F, TLI))
+ if (isKnownLibFunction(F, GetTLI(F)))
LibFunctions.insert(&F);
if (F.hasLocalLinkage())
@@ -631,7 +632,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(
// If the merge range is empty, then adding the edge didn't actually form any
// new cycles. We're done.
- if (empty(MergeRange)) {
+ if (MergeRange.empty()) {
// Now that the SCC structure is finalized, flip the kind to call.
SourceN->setEdgeKind(TargetN, Edge::Call);
return false; // No new cycle.
@@ -1751,16 +1752,14 @@ static void printNode(raw_ostream &OS, LazyCallGraph::Node &N) {
}
static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &C) {
- ptrdiff_t Size = size(C);
- OS << " SCC with " << Size << " functions:\n";
+ OS << " SCC with " << C.size() << " functions:\n";
for (LazyCallGraph::Node &N : C)
OS << " " << N.getFunction().getName() << "\n";
}
static void printRefSCC(raw_ostream &OS, LazyCallGraph::RefSCC &C) {
- ptrdiff_t Size = size(C);
- OS << " RefSCC with " << Size << " call SCCs:\n";
+ OS << " RefSCC with " << C.size() << " call SCCs:\n";
for (LazyCallGraph::SCC &InnerC : C)
printSCC(OS, InnerC);
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 542ff709d475..96722f32e355 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -188,7 +188,7 @@ namespace {
else {
auto It = ValueCache.find_as(Val);
if (It == ValueCache.end()) {
- ValueCache[Val] = make_unique<ValueCacheEntryTy>(Val, this);
+ ValueCache[Val] = std::make_unique<ValueCacheEntryTy>(Val, this);
It = ValueCache.find_as(Val);
assert(It != ValueCache.end() && "Val was just added to the map!");
}
@@ -434,6 +434,8 @@ namespace {
ValueLatticeElement &BBLV, WithOverflowInst *WO, BasicBlock *BB);
bool solveBlockValueIntrinsic(ValueLatticeElement &BBLV, IntrinsicInst *II,
BasicBlock *BB);
+ bool solveBlockValueExtractValue(ValueLatticeElement &BBLV,
+ ExtractValueInst *EVI, BasicBlock *BB);
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
ValueLatticeElement &BBLV,
Instruction *BBI);
@@ -648,9 +650,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
return solveBlockValueBinaryOp(Res, BO, BB);
if (auto *EVI = dyn_cast<ExtractValueInst>(BBI))
- if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
- if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0)
- return solveBlockValueOverflowIntrinsic(Res, WO, BB);
+ return solveBlockValueExtractValue(Res, EVI, BB);
if (auto *II = dyn_cast<IntrinsicInst>(BBI))
return solveBlockValueIntrinsic(Res, II, BB);
@@ -1135,6 +1135,33 @@ bool LazyValueInfoImpl::solveBlockValueIntrinsic(
}
}
+bool LazyValueInfoImpl::solveBlockValueExtractValue(
+ ValueLatticeElement &BBLV, ExtractValueInst *EVI, BasicBlock *BB) {
+ if (auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()))
+ if (EVI->getNumIndices() == 1 && *EVI->idx_begin() == 0)
+ return solveBlockValueOverflowIntrinsic(BBLV, WO, BB);
+
+ // Handle extractvalue of insertvalue to allow further simplification
+ // based on replaced with.overflow intrinsics.
+ if (Value *V = SimplifyExtractValueInst(
+ EVI->getAggregateOperand(), EVI->getIndices(),
+ EVI->getModule()->getDataLayout())) {
+ if (!hasBlockValue(V, BB)) {
+ if (pushBlockValue({ BB, V }))
+ return false;
+ BBLV = ValueLatticeElement::getOverdefined();
+ return true;
+ }
+ BBLV = getBlockValue(V, BB);
+ return true;
+ }
+
+ LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined (unknown extractvalue).\n");
+ BBLV = ValueLatticeElement::getOverdefined();
+ return true;
+}
+
static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
bool isTrueDest) {
Value *LHS = ICI->getOperand(0);
@@ -1575,7 +1602,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) {
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
Info.DT = DTWP ? &DTWP->getDomTree() : nullptr;
- Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
if (Info.PImpl)
getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear();
diff --git a/lib/Analysis/LegacyDivergenceAnalysis.cpp b/lib/Analysis/LegacyDivergenceAnalysis.cpp
index 52212e1c42aa..7de9d2cbfddb 100644
--- a/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -93,8 +93,9 @@ namespace {
class DivergencePropagator {
public:
DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
- PostDominatorTree &PDT, DenseSet<const Value *> &DV)
- : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+ PostDominatorTree &PDT, DenseSet<const Value *> &DV,
+ DenseSet<const Use *> &DU)
+ : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {}
void populateWithSourcesOfDivergence();
void propagate();
@@ -118,11 +119,14 @@ private:
PostDominatorTree &PDT;
std::vector<Value *> Worklist; // Stack for DFS.
DenseSet<const Value *> &DV; // Stores all divergent values.
+ DenseSet<const Use *> &DU; // Stores divergent uses of possibly uniform
+ // values.
};
void DivergencePropagator::populateWithSourcesOfDivergence() {
Worklist.clear();
DV.clear();
+ DU.clear();
for (auto &I : instructions(F)) {
if (TTI.isSourceOfDivergence(&I)) {
Worklist.push_back(&I);
@@ -197,8 +201,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
// dominators of TI until it is outside the influence region.
BasicBlock *InfluencedBB = ThisBB;
while (InfluenceRegion.count(InfluencedBB)) {
- for (auto &I : *InfluencedBB)
- findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+ for (auto &I : *InfluencedBB) {
+ if (!DV.count(&I))
+ findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+ }
DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
if (IDomNode == nullptr)
break;
@@ -208,9 +214,10 @@ void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
void DivergencePropagator::findUsersOutsideInfluenceRegion(
Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
- for (User *U : I.users()) {
- Instruction *UserInst = cast<Instruction>(U);
+ for (Use &Use : I.uses()) {
+ Instruction *UserInst = cast<Instruction>(Use.getUser());
if (!InfluenceRegion.count(UserInst->getParent())) {
+ DU.insert(&Use);
if (DV.insert(UserInst).second)
Worklist.push_back(UserInst);
}
@@ -250,9 +257,8 @@ void DivergencePropagator::computeInfluenceRegion(
void DivergencePropagator::exploreDataDependency(Value *V) {
// Follow def-use chains of V.
for (User *U : V->users()) {
- Instruction *UserInst = cast<Instruction>(U);
- if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
- Worklist.push_back(UserInst);
+ if (!TTI.isAlwaysUniform(U) && DV.insert(U).second)
+ Worklist.push_back(U);
}
}
@@ -320,6 +326,7 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
return false;
DivergentValues.clear();
+ DivergentUses.clear();
gpuDA = nullptr;
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -328,11 +335,11 @@ bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
if (shouldUseGPUDivergenceAnalysis(F)) {
// run the new GPU divergence analysis
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- gpuDA = llvm::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
+ gpuDA = std::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
} else {
// run LLVM's existing DivergenceAnalysis
- DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues);
+ DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses);
DP.populateWithSourcesOfDivergence();
DP.propagate();
}
@@ -351,6 +358,13 @@ bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
return DivergentValues.count(V);
}
+bool LegacyDivergenceAnalysis::isDivergentUse(const Use *U) const {
+ if (gpuDA) {
+ return gpuDA->isDivergentUse(*U);
+ }
+ return DivergentValues.count(U->get()) || DivergentUses.count(U);
+}
+
void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
return;
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index d28b8a189d4b..db18716c64cf 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -205,7 +205,7 @@ bool Lint::runOnFunction(Function &F) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
visit(F);
dbgs() << MessagesStr.str();
Messages.clear();
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 31da4e9ec783..641e92eac781 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -12,6 +12,9 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalAlias.h"
@@ -24,34 +27,30 @@
using namespace llvm;
-static bool isAligned(const Value *Base, const APInt &Offset, unsigned Align,
- const DataLayout &DL) {
- APInt BaseAlign(Offset.getBitWidth(), Base->getPointerAlignment(DL));
-
- if (!BaseAlign) {
- Type *Ty = Base->getType()->getPointerElementType();
- if (!Ty->isSized())
- return false;
- BaseAlign = DL.getABITypeAlignment(Ty);
- }
-
- APInt Alignment(Offset.getBitWidth(), Align);
-
- assert(Alignment.isPowerOf2() && "must be a power of 2!");
- return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1));
+static MaybeAlign getBaseAlign(const Value *Base, const DataLayout &DL) {
+ if (const MaybeAlign PA = Base->getPointerAlignment(DL))
+ return *PA;
+ Type *const Ty = Base->getType()->getPointerElementType();
+ if (!Ty->isSized())
+ return None;
+ return Align(DL.getABITypeAlignment(Ty));
}
-static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) {
- Type *Ty = Base->getType();
- assert(Ty->isSized() && "must be sized");
- APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
- return isAligned(Base, Offset, Align, DL);
+static bool isAligned(const Value *Base, const APInt &Offset, Align Alignment,
+ const DataLayout &DL) {
+ if (MaybeAlign BA = getBaseAlign(Base, DL)) {
+ const APInt APBaseAlign(Offset.getBitWidth(), BA->value());
+ const APInt APAlign(Offset.getBitWidth(), Alignment.value());
+ assert(APAlign.isPowerOf2() && "must be a power of 2!");
+ return APBaseAlign.uge(APAlign) && !(Offset & (APAlign - 1));
+ }
+ return false;
}
/// Test if V is always a pointer to allocated and suitably aligned memory for
/// a simple load or store.
static bool isDereferenceableAndAlignedPointer(
- const Value *V, unsigned Align, const APInt &Size, const DataLayout &DL,
+ const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL,
const Instruction *CtxI, const DominatorTree *DT,
SmallPtrSetImpl<const Value *> &Visited) {
// Already visited? Bail out, we've likely hit unreachable code.
@@ -63,17 +62,22 @@ static bool isDereferenceableAndAlignedPointer(
// bitcast instructions are no-ops as far as dereferenceability is concerned.
if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
- return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, Size,
- DL, CtxI, DT, Visited);
+ return isDereferenceableAndAlignedPointer(BC->getOperand(0), Alignment,
+ Size, DL, CtxI, DT, Visited);
bool CheckForNonNull = false;
APInt KnownDerefBytes(Size.getBitWidth(),
V->getPointerDereferenceableBytes(DL, CheckForNonNull));
- if (KnownDerefBytes.getBoolValue()) {
- if (KnownDerefBytes.uge(Size))
- if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT))
- return isAligned(V, Align, DL);
- }
+ if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size))
+ if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) {
+ // As we recursed through GEPs to get here, we've incrementally checked
+ // that each step advanced by a multiple of the alignment. If our base is
+ // properly aligned, then the original offset accessed must also be.
+ Type *Ty = V->getType();
+ assert(Ty->isSized() && "must be sized");
+ APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
+ return isAligned(V, Offset, Alignment, DL);
+ }
// For GEPs, determine if the indexing lands within the allocated object.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
@@ -81,7 +85,8 @@ static bool isDereferenceableAndAlignedPointer(
APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() ||
- !Offset.urem(APInt(Offset.getBitWidth(), Align)).isMinValue())
+ !Offset.urem(APInt(Offset.getBitWidth(), Alignment.value()))
+ .isMinValue())
return false;
// If the base pointer is dereferenceable for Offset+Size bytes, then the
@@ -93,67 +98,69 @@ static bool isDereferenceableAndAlignedPointer(
// Offset and Size may have different bit widths if we have visited an
// addrspacecast, so we can't do arithmetic directly on the APInt values.
return isDereferenceableAndAlignedPointer(
- Base, Align, Offset + Size.sextOrTrunc(Offset.getBitWidth()),
- DL, CtxI, DT, Visited);
+ Base, Alignment, Offset + Size.sextOrTrunc(Offset.getBitWidth()), DL,
+ CtxI, DT, Visited);
}
// For gc.relocate, look through relocations
if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V))
return isDereferenceableAndAlignedPointer(
- RelocateInst->getDerivedPtr(), Align, Size, DL, CtxI, DT, Visited);
+ RelocateInst->getDerivedPtr(), Alignment, Size, DL, CtxI, DT, Visited);
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
- return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, Size,
- DL, CtxI, DT, Visited);
+ return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment,
+ Size, DL, CtxI, DT, Visited);
if (const auto *Call = dyn_cast<CallBase>(V))
- if (auto *RP = getArgumentAliasingToReturnedPointer(Call))
- return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT,
- Visited);
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
+ return isDereferenceableAndAlignedPointer(RP, Alignment, Size, DL, CtxI,
+ DT, Visited);
// If we don't know, assume the worst.
return false;
}
-bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
const APInt &Size,
const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT) {
+ // Note: At the moment, Size can be zero. This ends up being interpreted as
+ // a query of whether [Base, V] is dereferenceable and V is aligned (since
+ // that's what the implementation happened to do). It's unclear if this is
+ // the desired semantic, but at least SelectionDAG does exercise this case.
+
SmallPtrSet<const Value *, 32> Visited;
- return ::isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT,
+ return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT,
Visited);
}
bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
- unsigned Align,
+ MaybeAlign MA,
const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT) {
+ if (!Ty->isSized())
+ return false;
+
// When dereferenceability information is provided by a dereferenceable
// attribute, we know exactly how many bytes are dereferenceable. If we can
// determine the exact offset to the attributed variable, we can use that
// information here.
// Require ABI alignment for loads without alignment specification
- if (Align == 0)
- Align = DL.getABITypeAlignment(Ty);
-
- if (!Ty->isSized())
- return false;
-
- SmallPtrSet<const Value *, 32> Visited;
- return ::isDereferenceableAndAlignedPointer(
- V, Align,
- APInt(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty)),
- DL, CtxI, DT, Visited);
+ const Align Alignment = DL.getValueOrABITypeAlignment(MA, Ty);
+ APInt AccessSize(DL.getIndexTypeSizeInBits(V->getType()),
+ DL.getTypeStoreSize(Ty));
+ return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI,
+ DT);
}
bool llvm::isDereferenceablePointer(const Value *V, Type *Ty,
const DataLayout &DL,
const Instruction *CtxI,
const DominatorTree *DT) {
- return isDereferenceableAndAlignedPointer(V, Ty, 1, DL, CtxI, DT);
+ return isDereferenceableAndAlignedPointer(V, Ty, Align::None(), DL, CtxI, DT);
}
/// Test if A and B will obviously have the same value.
@@ -187,6 +194,60 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
return false;
}
+bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
+ ScalarEvolution &SE,
+ DominatorTree &DT) {
+ auto &DL = LI->getModule()->getDataLayout();
+ Value *Ptr = LI->getPointerOperand();
+
+ APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
+ DL.getTypeStoreSize(LI->getType()));
+ const Align Alignment = DL.getValueOrABITypeAlignment(
+ MaybeAlign(LI->getAlignment()), LI->getType());
+
+ Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
+
+ // If given a uniform (i.e. non-varying) address, see if we can prove the
+ // access is safe within the loop w/o needing predication.
+ if (L->isLoopInvariant(Ptr))
+ return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
+ HeaderFirstNonPHI, &DT);
+
+ // Otherwise, check to see if we have a repeating access pattern where we can
+ // prove that all accesses are well aligned and dereferenceable.
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
+ if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
+ return false;
+ auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
+ if (!Step)
+ return false;
+ // TODO: generalize to access patterns which have gaps
+ if (Step->getAPInt() != EltSize)
+ return false;
+
+ // TODO: If the symbolic trip count has a small bound (max count), we might
+ // be able to prove safety.
+ auto TC = SE.getSmallConstantTripCount(L);
+ if (!TC)
+ return false;
+
+ const APInt AccessSize = TC * EltSize;
+
+ auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart());
+ if (!StartS)
+ return false;
+ assert(SE.isLoopInvariant(StartS, L) && "implied by addrec definition");
+ Value *Base = StartS->getValue();
+
+ // For the moment, restrict ourselves to the case where the access size is a
+ // multiple of the requested alignment and the base is aligned.
+ // TODO: generalize if a case found which warrants
+ if (EltSize.urem(Alignment.value()) != 0)
+ return false;
+ return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
+ HeaderFirstNonPHI, &DT);
+}
+
/// Check if executing a load of this pointer value cannot trap.
///
/// If DT and ScanFrom are specified this method performs context-sensitive
@@ -198,64 +259,25 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
///
/// This uses the pointee type to determine how many bytes need to be safe to
/// load from the pointer.
-bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
+bool llvm::isSafeToLoadUnconditionally(Value *V, MaybeAlign MA, APInt &Size,
const DataLayout &DL,
Instruction *ScanFrom,
const DominatorTree *DT) {
// Zero alignment means that the load has the ABI alignment for the target
- if (Align == 0)
- Align = DL.getABITypeAlignment(V->getType()->getPointerElementType());
- assert(isPowerOf2_32(Align));
+ const Align Alignment =
+ DL.getValueOrABITypeAlignment(MA, V->getType()->getPointerElementType());
// If DT is not specified we can't make context-sensitive query
const Instruction* CtxI = DT ? ScanFrom : nullptr;
- if (isDereferenceableAndAlignedPointer(V, Align, Size, DL, CtxI, DT))
+ if (isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT))
return true;
- int64_t ByteOffset = 0;
- Value *Base = V;
- Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL);
-
- if (ByteOffset < 0) // out of bounds
+ if (!ScanFrom)
return false;
- Type *BaseType = nullptr;
- unsigned BaseAlign = 0;
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
- // An alloca is safe to load from as load as it is suitably aligned.
- BaseType = AI->getAllocatedType();
- BaseAlign = AI->getAlignment();
- } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
- // Global variables are not necessarily safe to load from if they are
- // interposed arbitrarily. Their size may change or they may be weak and
- // require a test to determine if they were in fact provided.
- if (!GV->isInterposable()) {
- BaseType = GV->getType()->getElementType();
- BaseAlign = GV->getAlignment();
- }
- }
-
- PointerType *AddrTy = cast<PointerType>(V->getType());
- uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType());
-
- // If we found a base allocated type from either an alloca or global variable,
- // try to see if we are definitively within the allocated region. We need to
- // know the size of the base type and the loaded type to do anything in this
- // case.
- if (BaseType && BaseType->isSized()) {
- if (BaseAlign == 0)
- BaseAlign = DL.getPrefTypeAlignment(BaseType);
-
- if (Align <= BaseAlign) {
- // Check if the load is within the bounds of the underlying object.
- if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) &&
- ((ByteOffset % Align) == 0))
- return true;
- }
- }
-
- if (!ScanFrom)
+ if (Size.getBitWidth() > 64)
return false;
+ const uint64_t LoadSize = Size.getZExtValue();
// Otherwise, be a little bit aggressive by scanning the local block where we
// want to check to see if the pointer is already being loaded or stored
@@ -279,7 +301,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
return false;
Value *AccessedPtr;
- unsigned AccessedAlign;
+ MaybeAlign MaybeAccessedAlign;
if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
// Ignore volatile loads. The execution of a volatile load cannot
// be used to prove an address is backed by regular memory; it can,
@@ -287,24 +309,26 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
if (LI->isVolatile())
continue;
AccessedPtr = LI->getPointerOperand();
- AccessedAlign = LI->getAlignment();
+ MaybeAccessedAlign = MaybeAlign(LI->getAlignment());
} else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
// Ignore volatile stores (see comment for loads).
if (SI->isVolatile())
continue;
AccessedPtr = SI->getPointerOperand();
- AccessedAlign = SI->getAlignment();
+ MaybeAccessedAlign = MaybeAlign(SI->getAlignment());
} else
continue;
Type *AccessedTy = AccessedPtr->getType()->getPointerElementType();
- if (AccessedAlign == 0)
- AccessedAlign = DL.getABITypeAlignment(AccessedTy);
- if (AccessedAlign < Align)
+
+ const Align AccessedAlign =
+ DL.getValueOrABITypeAlignment(MaybeAccessedAlign, AccessedTy);
+ if (AccessedAlign < Alignment)
continue;
// Handle trivial cases.
- if (AccessedPtr == V)
+ if (AccessedPtr == V &&
+ LoadSize <= DL.getTypeStoreSize(AccessedTy))
return true;
if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
@@ -314,12 +338,12 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
return false;
}
-bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
+bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment,
const DataLayout &DL,
Instruction *ScanFrom,
const DominatorTree *DT) {
APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty));
- return isSafeToLoadUnconditionally(V, Align, Size, DL, ScanFrom, DT);
+ return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, DT);
}
/// DefMaxInstsToScan - the default number of maximum instructions
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 36bd9a8b7ea7..3d8f77675f3a 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1189,18 +1189,31 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
+ // Retrieve the address space again as pointer stripping now tracks through
+ // `addrspacecast`.
+ ASA = cast<PointerType>(PtrA->getType())->getAddressSpace();
+ ASB = cast<PointerType>(PtrB->getType())->getAddressSpace();
+ // Check that the address spaces match and that the pointers are valid.
+ if (ASA != ASB)
+ return false;
+
+ IdxWidth = DL.getIndexSizeInBits(ASA);
+ OffsetA = OffsetA.sextOrTrunc(IdxWidth);
+ OffsetB = OffsetB.sextOrTrunc(IdxWidth);
+
+ APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
+
// OffsetDelta = OffsetB - OffsetA;
const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
- const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
- const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
+ const APInt &OffsetDelta = cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt();
+
// Check if they are based on the same pointer. That makes the offsets
// sufficient.
if (PtrA == PtrB)
@@ -1641,13 +1654,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
// Check every access pair.
while (AI != AE) {
Visited.insert(*AI);
- EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
+ bool AIIsWrite = AI->getInt();
+ // Check loads only against next equivalent class, but stores also against
+ // other stores in the same equivalence class - to the same address.
+ EquivalenceClasses<MemAccessInfo>::member_iterator OI =
+ (AIIsWrite ? AI : std::next(AI));
while (OI != AE) {
// Check every accessing instruction pair in program order.
for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
- for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
- I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
+ // Scan all accesses of another equivalence class, but only the next
+ // accesses of the same equivalent class.
+ for (std::vector<unsigned>::iterator
+ I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
+ I2E = (OI == AI ? I1E : Accesses[*OI].end());
+ I2 != I2E; ++I2) {
auto A = std::make_pair(&*AI, *I1);
auto B = std::make_pair(&*OI, *I2);
@@ -2078,7 +2099,7 @@ OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
DL = I->getDebugLoc();
}
- Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
+ Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
CodeRegion);
return *Report;
}
@@ -2323,9 +2344,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI)
- : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
- PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
- DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
+ : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
+ PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
+ DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
HasConvergentOp(false),
HasDependenceInvolvingLoopInvariantAddress(false) {
@@ -2380,7 +2401,7 @@ const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
auto &LAI = LoopAccessInfoMap[L];
if (!LAI)
- LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
+ LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
return *LAI.get();
}
@@ -2399,7 +2420,7 @@ void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- TLI = TLIP ? &TLIP->getTLI() : nullptr;
+ TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp
index a10a87ce113b..02d40fb8d72a 100644
--- a/lib/Analysis/LoopAnalysisManager.cpp
+++ b/lib/Analysis/LoopAnalysisManager.cpp
@@ -46,7 +46,7 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate(
// invalidation logic below to act on that.
auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>();
bool invalidateMemorySSAAnalysis = false;
- if (EnableMSSALoopDependency)
+ if (MSSAUsed)
invalidateMemorySSAAnalysis = Inv.invalidate<MemorySSAAnalysis>(F, PA);
if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
Inv.invalidate<AAManager>(F, PA) ||
diff --git a/lib/Analysis/LoopCacheAnalysis.cpp b/lib/Analysis/LoopCacheAnalysis.cpp
new file mode 100644
index 000000000000..10d2fe07884a
--- /dev/null
+++ b/lib/Analysis/LoopCacheAnalysis.cpp
@@ -0,0 +1,625 @@
+//===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the implementation for the loop cache analysis.
+/// The implementation is largely based on the following paper:
+///
+/// Compiler Optimizations for Improving Data Locality
+/// By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
+/// http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
+///
+/// The general approach taken to estimate the number of cache lines used by the
+/// memory references in an inner loop is:
+/// 1. Partition memory references that exhibit temporal or spacial reuse
+/// into reference groups.
+/// 2. For each loop L in the a loop nest LN:
+/// a. Compute the cost of the reference group
+/// b. Compute the loop cost by summing up the reference groups costs
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopCacheAnalysis.h"
+#include "llvm/ADT/BreadthFirstIterator.h"
+#include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-cache-cost"
+
+static cl::opt<unsigned> DefaultTripCount(
+ "default-trip-count", cl::init(100), cl::Hidden,
+ cl::desc("Use this to specify the default trip count of a loop"));
+
+// In this analysis two array references are considered to exhibit temporal
+// reuse if they access either the same memory location, or a memory location
+// with distance smaller than a configurable threshold.
+static cl::opt<unsigned> TemporalReuseThreshold(
+ "temporal-reuse-threshold", cl::init(2), cl::Hidden,
+ cl::desc("Use this to specify the max. distance between array elements "
+ "accessed in a loop so that the elements are classified to have "
+ "temporal reuse"));
+
+/// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
+/// nullptr if any loops in the loop vector supplied has more than one sibling.
+/// The loop vector is expected to contain loops collected in breadth-first
+/// order.
+static Loop *getInnerMostLoop(const LoopVectorTy &Loops) {
+ assert(!Loops.empty() && "Expecting a non-empy loop vector");
+
+ Loop *LastLoop = Loops.back();
+ Loop *ParentLoop = LastLoop->getParentLoop();
+
+ if (ParentLoop == nullptr) {
+ assert(Loops.size() == 1 && "Expecting a single loop");
+ return LastLoop;
+ }
+
+ return (std::is_sorted(Loops.begin(), Loops.end(),
+ [](const Loop *L1, const Loop *L2) {
+ return L1->getLoopDepth() < L2->getLoopDepth();
+ }))
+ ? LastLoop
+ : nullptr;
+}
+
+static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
+ const Loop &L, ScalarEvolution &SE) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
+ if (!AR || !AR->isAffine())
+ return false;
+
+ assert(AR->getLoop() && "AR should have a loop");
+
+ // Check that start and increment are not add recurrences.
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
+ return false;
+
+ // Check that start and increment are both invariant in the loop.
+ if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
+ return false;
+
+ return AR->getStepRecurrence(SE) == &ElemSize;
+}
+
+/// Compute the trip count for the given loop \p L. Return the SCEV expression
+/// for the trip count or nullptr if it cannot be computed.
+static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
+ const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+ !isa<SCEVConstant>(BackedgeTakenCount))
+ return nullptr;
+
+ return SE.getAddExpr(BackedgeTakenCount,
+ SE.getOne(BackedgeTakenCount->getType()));
+}
+
+//===----------------------------------------------------------------------===//
+// IndexedReference implementation
+//
+raw_ostream &llvm::operator<<(raw_ostream &OS, const IndexedReference &R) {
+ if (!R.IsValid) {
+ OS << R.StoreOrLoadInst;
+ OS << ", IsValid=false.";
+ return OS;
+ }
+
+ OS << *R.BasePointer;
+ for (const SCEV *Subscript : R.Subscripts)
+ OS << "[" << *Subscript << "]";
+
+ OS << ", Sizes: ";
+ for (const SCEV *Size : R.Sizes)
+ OS << "[" << *Size << "]";
+
+ return OS;
+}
+
+IndexedReference::IndexedReference(Instruction &StoreOrLoadInst,
+ const LoopInfo &LI, ScalarEvolution &SE)
+ : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
+ assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
+ "Expecting a load or store instruction");
+
+ IsValid = delinearize(LI);
+ if (IsValid)
+ LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
+ << "\n");
+}
+
+Optional<bool> IndexedReference::hasSpacialReuse(const IndexedReference &Other,
+ unsigned CLS,
+ AliasAnalysis &AA) const {
+ assert(IsValid && "Expecting a valid reference");
+
+ if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "No spacial reuse: different base pointers\n");
+ return false;
+ }
+
+ unsigned NumSubscripts = getNumSubscripts();
+ if (NumSubscripts != Other.getNumSubscripts()) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "No spacial reuse: different number of subscripts\n");
+ return false;
+ }
+
+ // all subscripts must be equal, except the leftmost one (the last one).
+ for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
+ if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
+ LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
+ << "\n\t" << *getSubscript(SubNum) << "\n\t"
+ << *Other.getSubscript(SubNum) << "\n");
+ return false;
+ }
+ }
+
+ // the difference between the last subscripts must be less than the cache line
+ // size.
+ const SCEV *LastSubscript = getLastSubscript();
+ const SCEV *OtherLastSubscript = Other.getLastSubscript();
+ const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
+ SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
+
+ if (Diff == nullptr) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "No spacial reuse, difference between subscript:\n\t"
+ << *LastSubscript << "\n\t" << OtherLastSubscript
+ << "\nis not constant.\n");
+ return None;
+ }
+
+ bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
+
+ LLVM_DEBUG({
+ if (InSameCacheLine)
+ dbgs().indent(2) << "Found spacial reuse.\n";
+ else
+ dbgs().indent(2) << "No spacial reuse.\n";
+ });
+
+ return InSameCacheLine;
+}
+
+Optional<bool> IndexedReference::hasTemporalReuse(const IndexedReference &Other,
+ unsigned MaxDistance,
+ const Loop &L,
+ DependenceInfo &DI,
+ AliasAnalysis &AA) const {
+ assert(IsValid && "Expecting a valid reference");
+
+ if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "No temporal reuse: different base pointer\n");
+ return false;
+ }
+
+ std::unique_ptr<Dependence> D =
+ DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
+
+ if (D == nullptr) {
+ LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
+ return false;
+ }
+
+ if (D->isLoopIndependent()) {
+ LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
+ return true;
+ }
+
+ // Check the dependence distance at every loop level. There is temporal reuse
+ // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
+ // it is zero at every other loop level.
+ int LoopDepth = L.getLoopDepth();
+ int Levels = D->getLevels();
+ for (int Level = 1; Level <= Levels; ++Level) {
+ const SCEV *Distance = D->getDistance(Level);
+ const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
+
+ if (SCEVConst == nullptr) {
+ LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
+ return None;
+ }
+
+ const ConstantInt &CI = *SCEVConst->getValue();
+ if (Level != LoopDepth && !CI.isZero()) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "No temporal reuse: distance is not zero at depth=" << Level
+ << "\n");
+ return false;
+ } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
+ LLVM_DEBUG(
+ dbgs().indent(2)
+ << "No temporal reuse: distance is greater than MaxDistance at depth="
+ << Level << "\n");
+ return false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
+ return true;
+}
+
+CacheCostTy IndexedReference::computeRefCost(const Loop &L,
+ unsigned CLS) const {
+ assert(IsValid && "Expecting a valid reference");
+ LLVM_DEBUG({
+ dbgs().indent(2) << "Computing cache cost for:\n";
+ dbgs().indent(4) << *this << "\n";
+ });
+
+ // If the indexed reference is loop invariant the cost is one.
+ if (isLoopInvariant(L)) {
+ LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
+ return 1;
+ }
+
+ const SCEV *TripCount = computeTripCount(L, SE);
+ if (!TripCount) {
+ LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
+ << " could not be computed, using DefaultTripCount\n");
+ const SCEV *ElemSize = Sizes.back();
+ TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
+ }
+ LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
+
+ // If the indexed reference is 'consecutive' the cost is
+ // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
+ const SCEV *RefCost = TripCount;
+
+ if (isConsecutive(L, CLS)) {
+ const SCEV *Coeff = getLastCoefficient();
+ const SCEV *ElemSize = Sizes.back();
+ const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+ const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
+ const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
+ RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
+ LLVM_DEBUG(dbgs().indent(4)
+ << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
+ << *RefCost << "\n");
+ } else
+ LLVM_DEBUG(dbgs().indent(4)
+ << "Access is not consecutive: RefCost=TripCount=" << *RefCost
+ << "\n");
+
+ // Attempt to fold RefCost into a constant.
+ if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
+ return ConstantCost->getValue()->getSExtValue();
+
+ LLVM_DEBUG(dbgs().indent(4)
+ << "RefCost is not a constant! Setting to RefCost=InvalidCost "
+ "(invalid value).\n");
+
+ return CacheCost::InvalidCost;
+}
+
+bool IndexedReference::delinearize(const LoopInfo &LI) {
+ assert(Subscripts.empty() && "Subscripts should be empty");
+ assert(Sizes.empty() && "Sizes should be empty");
+ assert(!IsValid && "Should be called once from the constructor");
+ LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
+
+ const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
+ const BasicBlock *BB = StoreOrLoadInst.getParent();
+
+ for (Loop *L = LI.getLoopFor(BB); L != nullptr; L = L->getParentLoop()) {
+ const SCEV *AccessFn =
+ SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
+
+ BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
+ if (BasePointer == nullptr) {
+ LLVM_DEBUG(
+ dbgs().indent(2)
+ << "ERROR: failed to delinearize, can't identify base pointer\n");
+ return false;
+ }
+
+ AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
+
+ LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
+ << "', AccessFn: " << *AccessFn << "\n");
+
+ SE.delinearize(AccessFn, Subscripts, Sizes,
+ SE.getElementSize(&StoreOrLoadInst));
+
+ if (Subscripts.empty() || Sizes.empty() ||
+ Subscripts.size() != Sizes.size()) {
+ // Attempt to determine whether we have a single dimensional array access.
+ // before giving up.
+ if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "ERROR: failed to delinearize reference\n");
+ Subscripts.clear();
+ Sizes.clear();
+ break;
+ }
+
+ const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
+ Subscripts.push_back(Div);
+ Sizes.push_back(ElemSize);
+ }
+
+ return all_of(Subscripts, [&](const SCEV *Subscript) {
+ return isSimpleAddRecurrence(*Subscript, *L);
+ });
+ }
+
+ return false;
+}
+
+bool IndexedReference::isLoopInvariant(const Loop &L) const {
+ Value *Addr = getPointerOperand(&StoreOrLoadInst);
+ assert(Addr != nullptr && "Expecting either a load or a store instruction");
+ assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
+
+ if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
+ return true;
+
+ // The indexed reference is loop invariant if none of the coefficients use
+ // the loop induction variable.
+ bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
+ return isCoeffForLoopZeroOrInvariant(*Subscript, L);
+ });
+
+ return allCoeffForLoopAreZero;
+}
+
+bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
+ // The indexed reference is 'consecutive' if the only coefficient that uses
+ // the loop induction variable is the last one...
+ const SCEV *LastSubscript = Subscripts.back();
+ for (const SCEV *Subscript : Subscripts) {
+ if (Subscript == LastSubscript)
+ continue;
+ if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
+ return false;
+ }
+
+ // ...and the access stride is less than the cache line size.
+ const SCEV *Coeff = getLastCoefficient();
+ const SCEV *ElemSize = Sizes.back();
+ const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+ const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
+
+ return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
+}
+
+const SCEV *IndexedReference::getLastCoefficient() const {
+ const SCEV *LastSubscript = getLastSubscript();
+ assert(isa<SCEVAddRecExpr>(LastSubscript) &&
+ "Expecting a SCEV add recurrence expression");
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript);
+ return AR->getStepRecurrence(SE);
+}
+
+bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
+ const Loop &L) const {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
+ return (AR != nullptr) ? AR->getLoop() != &L
+ : SE.isLoopInvariant(&Subscript, &L);
+}
+
+bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
+ const Loop &L) const {
+ if (!isa<SCEVAddRecExpr>(Subscript))
+ return false;
+
+ const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
+ assert(AR->getLoop() && "AR should have a loop");
+
+ if (!AR->isAffine())
+ return false;
+
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(SE);
+
+ if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
+ return false;
+
+ return true;
+}
+
+bool IndexedReference::isAliased(const IndexedReference &Other,
+ AliasAnalysis &AA) const {
+ const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
+ const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
+ return AA.isMustAlias(Loc1, Loc2);
+}
+
+//===----------------------------------------------------------------------===//
+// CacheCost implementation
+//
+raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) {
+ for (const auto &LC : CC.LoopCosts) {
+ const Loop *L = LC.first;
+ OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
+ }
+ return OS;
+}
+
+CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
+ ScalarEvolution &SE, TargetTransformInfo &TTI,
+ AliasAnalysis &AA, DependenceInfo &DI,
+ Optional<unsigned> TRT)
+ : Loops(Loops), TripCounts(), LoopCosts(),
+ TRT(TRT == None ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
+ LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) {
+ assert(!Loops.empty() && "Expecting a non-empty loop vector.");
+
+ for (const Loop *L : Loops) {
+ unsigned TripCount = SE.getSmallConstantTripCount(L);
+ TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
+ TripCounts.push_back({L, TripCount});
+ }
+
+ calculateCacheFootprint();
+}
+
+std::unique_ptr<CacheCost>
+CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
+ DependenceInfo &DI, Optional<unsigned> TRT) {
+ if (Root.getParentLoop()) {
+ LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
+ return nullptr;
+ }
+
+ LoopVectorTy Loops;
+ for (Loop *L : breadth_first(&Root))
+ Loops.push_back(L);
+
+ if (!getInnerMostLoop(Loops)) {
+ LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
+ "than one innermost loop\n");
+ return nullptr;
+ }
+
+ return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
+}
+
+void CacheCost::calculateCacheFootprint() {
+ LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
+ ReferenceGroupsTy RefGroups;
+ if (!populateReferenceGroups(RefGroups))
+ return;
+
+ LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
+ for (const Loop *L : Loops) {
+ assert((std::find_if(LoopCosts.begin(), LoopCosts.end(),
+ [L](const LoopCacheCostTy &LCC) {
+ return LCC.first == L;
+ }) == LoopCosts.end()) &&
+ "Should not add duplicate element");
+ CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
+ LoopCosts.push_back(std::make_pair(L, LoopCost));
+ }
+
+ sortLoopCosts();
+ RefGroups.clear();
+}
+
+bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
+ assert(RefGroups.empty() && "Reference groups should be empty");
+
+ unsigned CLS = TTI.getCacheLineSize();
+ Loop *InnerMostLoop = getInnerMostLoop(Loops);
+ assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
+
+ for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
+ for (Instruction &I : *BB) {
+ if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
+ continue;
+
+ std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
+ if (!R->isValid())
+ continue;
+
+ bool Added = false;
+ for (ReferenceGroupTy &RefGroup : RefGroups) {
+ const IndexedReference &Representative = *RefGroup.front().get();
+ LLVM_DEBUG({
+ dbgs() << "References:\n";
+ dbgs().indent(2) << *R << "\n";
+ dbgs().indent(2) << Representative << "\n";
+ });
+
+ Optional<bool> HasTemporalReuse =
+ R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
+ Optional<bool> HasSpacialReuse =
+ R->hasSpacialReuse(Representative, CLS, AA);
+
+ if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
+ (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
+ RefGroup.push_back(std::move(R));
+ Added = true;
+ break;
+ }
+ }
+
+ if (!Added) {
+ ReferenceGroupTy RG;
+ RG.push_back(std::move(R));
+ RefGroups.push_back(std::move(RG));
+ }
+ }
+ }
+
+ if (RefGroups.empty())
+ return false;
+
+ LLVM_DEBUG({
+ dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
+ int n = 1;
+ for (const ReferenceGroupTy &RG : RefGroups) {
+ dbgs().indent(2) << "RefGroup " << n << ":\n";
+ for (const auto &IR : RG)
+ dbgs().indent(4) << *IR << "\n";
+ n++;
+ }
+ dbgs() << "\n";
+ });
+
+ return true;
+}
+
+CacheCostTy
+CacheCost::computeLoopCacheCost(const Loop &L,
+ const ReferenceGroupsTy &RefGroups) const {
+ if (!L.isLoopSimplifyForm())
+ return InvalidCost;
+
+ LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
+ << "' as innermost loop.\n");
+
+ // Compute the product of the trip counts of each other loop in the nest.
+ CacheCostTy TripCountsProduct = 1;
+ for (const auto &TC : TripCounts) {
+ if (TC.first == &L)
+ continue;
+ TripCountsProduct *= TC.second;
+ }
+
+ CacheCostTy LoopCost = 0;
+ for (const ReferenceGroupTy &RG : RefGroups) {
+ CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
+ LoopCost += RefGroupCost * TripCountsProduct;
+ }
+
+ LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
+ << "' has cost=" << LoopCost << "\n");
+
+ return LoopCost;
+}
+
+CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
+ const Loop &L) const {
+ assert(!RG.empty() && "Reference group should have at least one member.");
+
+ const IndexedReference *Representative = RG.front().get();
+ return Representative->computeRefCost(L, TTI.getCacheLineSize());
+}
+
+//===----------------------------------------------------------------------===//
+// LoopCachePrinterPass implementation
+//
+PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ Function *F = L.getHeader()->getParent();
+ DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
+
+ if (auto CC = CacheCost::getCacheCost(L, AR, DI))
+ OS << *CC;
+
+ return PreservedAnalyses::all();
+}
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index aa5da0859805..dbab5db7dbc2 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -359,6 +359,45 @@ bool Loop::isAuxiliaryInductionVariable(PHINode &AuxIndVar,
return SE.isLoopInvariant(IndDesc.getStep(), this);
}
+BranchInst *Loop::getLoopGuardBranch() const {
+ if (!isLoopSimplifyForm())
+ return nullptr;
+
+ BasicBlock *Preheader = getLoopPreheader();
+ BasicBlock *Latch = getLoopLatch();
+ assert(Preheader && Latch &&
+ "Expecting a loop with valid preheader and latch");
+
+ // Loop should be in rotate form.
+ if (!isLoopExiting(Latch))
+ return nullptr;
+
+ // Disallow loops with more than one unique exit block, as we do not verify
+ // that GuardOtherSucc post dominates all exit blocks.
+ BasicBlock *ExitFromLatch = getUniqueExitBlock();
+ if (!ExitFromLatch)
+ return nullptr;
+
+ BasicBlock *ExitFromLatchSucc = ExitFromLatch->getUniqueSuccessor();
+ if (!ExitFromLatchSucc)
+ return nullptr;
+
+ BasicBlock *GuardBB = Preheader->getUniquePredecessor();
+ if (!GuardBB)
+ return nullptr;
+
+ assert(GuardBB->getTerminator() && "Expecting valid guard terminator");
+
+ BranchInst *GuardBI = dyn_cast<BranchInst>(GuardBB->getTerminator());
+ if (!GuardBI || GuardBI->isUnconditional())
+ return nullptr;
+
+ BasicBlock *GuardOtherSucc = (GuardBI->getSuccessor(0) == Preheader)
+ ? GuardBI->getSuccessor(1)
+ : GuardBI->getSuccessor(0);
+ return (GuardOtherSucc == ExitFromLatchSucc) ? GuardBI : nullptr;
+}
+
bool Loop::isCanonical(ScalarEvolution &SE) const {
InductionDescriptor IndDesc;
if (!getInductionDescriptor(SE, IndDesc))
diff --git a/lib/Analysis/LoopUnrollAnalyzer.cpp b/lib/Analysis/LoopUnrollAnalyzer.cpp
index 1728b5e9f6d2..762623de41e9 100644
--- a/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -78,7 +78,7 @@ bool UnrolledInstAnalyzer::visitBinaryOperator(BinaryOperator &I) {
const DataLayout &DL = I.getModule()->getDataLayout();
if (auto FI = dyn_cast<FPMathOperator>(&I))
SimpleV =
- SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
+ SimplifyBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
else
SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp
index 77ebf89d9a08..5cf516a538b5 100644
--- a/lib/Analysis/MemDerefPrinter.cpp
+++ b/lib/Analysis/MemDerefPrinter.cpp
@@ -55,8 +55,8 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
Value *PO = LI->getPointerOperand();
if (isDereferenceablePointer(PO, LI->getType(), DL))
Deref.push_back(PO);
- if (isDereferenceableAndAlignedPointer(PO, LI->getType(),
- LI->getAlignment(), DL))
+ if (isDereferenceableAndAlignedPointer(
+ PO, LI->getType(), MaybeAlign(LI->getAlignment()), DL))
DerefAndAligned.insert(PO);
}
}
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 729dad463657..172c86eb4646 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -180,6 +180,19 @@ static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
return None;
}
+static Optional<AllocFnsTy>
+getAllocationData(const Value *V, AllocType AllocTy,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ bool LookThroughBitCast = false) {
+ bool IsNoBuiltinCall;
+ if (const Function *Callee =
+ getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall))
+ if (!IsNoBuiltinCall)
+ return getAllocationDataForFunction(
+ Callee, AllocTy, &GetTLI(const_cast<Function &>(*Callee)));
+ return None;
+}
+
static Optional<AllocFnsTy> getAllocationSize(const Value *V,
const TargetLibraryInfo *TLI) {
bool IsNoBuiltinCall;
@@ -223,6 +236,11 @@ bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
return getAllocationData(V, AnyAlloc, TLI, LookThroughBitCast).hasValue();
}
+bool llvm::isAllocationFn(
+ const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, AnyAlloc, GetTLI, LookThroughBitCast).hasValue();
+}
/// Tests if a value is a call or invoke to a function that returns a
/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions).
@@ -240,6 +258,12 @@ bool llvm::isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast) {
return getAllocationData(V, MallocLike, TLI, LookThroughBitCast).hasValue();
}
+bool llvm::isMallocLikeFn(
+ const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, MallocLike, GetTLI, LookThroughBitCast)
+ .hasValue();
+}
/// Tests if a value is a call or invoke to a library function that
/// allocates zero-filled memory (such as calloc).
@@ -276,12 +300,27 @@ bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) {
return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue();
}
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory and throws if an allocation failed (e.g., new).
+bool llvm::isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast).hasValue();
+}
+
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory (strdup, strndup).
+bool llvm::isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast) {
+ return getAllocationData(V, StrDupLike, TLI, LookThroughBitCast).hasValue();
+}
+
/// extractMallocCall - Returns the corresponding CallInst if the instruction
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
/// ignore InvokeInst here.
-const CallInst *llvm::extractMallocCall(const Value *I,
- const TargetLibraryInfo *TLI) {
- return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr;
+const CallInst *llvm::extractMallocCall(
+ const Value *I,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
+ return isMallocLikeFn(I, GetTLI) ? dyn_cast<CallInst>(I) : nullptr;
}
static Value *computeArraySize(const CallInst *CI, const DataLayout &DL,
@@ -521,9 +560,9 @@ STATISTIC(ObjectVisitorArgument,
STATISTIC(ObjectVisitorLoad,
"Number of load instructions with unsolved size and offset");
-APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
- if (Options.RoundToAlign && Align)
- return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align));
+APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Alignment) {
+ if (Options.RoundToAlign && Alignment)
+ return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align(Alignment)));
return Size;
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index b25b655165d7..884587e020bb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -183,7 +183,7 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
MemDepResult MemoryDependenceResults::getCallDependencyFrom(
CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
BasicBlock *BB) {
- unsigned Limit = BlockScanLimit;
+ unsigned Limit = getDefaultBlockScanLimit();
// Walk backwards through the block, looking for dependencies.
while (ScanIt != BB->begin()) {
@@ -356,7 +356,7 @@ MemDepResult
MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
BasicBlock *BB) {
- if (!LI->getMetadata(LLVMContext::MD_invariant_group))
+ if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
return MemDepResult::getUnknown();
// Take the ptr operand after all casts and geps 0. This way we can search
@@ -417,7 +417,7 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
// same pointer operand) we can assume that value pointed by pointer
// operand didn't change.
if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
- U->getMetadata(LLVMContext::MD_invariant_group) != nullptr)
+ U->hasMetadata(LLVMContext::MD_invariant_group))
ClosestDependency = GetClosestDependency(ClosestDependency, U);
}
}
@@ -443,7 +443,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
OrderedBasicBlock *OBB) {
bool isInvariantLoad = false;
- unsigned DefaultLimit = BlockScanLimit;
+ unsigned DefaultLimit = getDefaultBlockScanLimit();
if (!Limit)
Limit = &DefaultLimit;
@@ -481,7 +481,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// Arguably, this logic should be pushed inside AliasAnalysis itself.
if (isLoad && QueryInst) {
LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
- if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+ if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load))
isInvariantLoad = true;
}
@@ -1746,6 +1746,9 @@ void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
AnalysisKey MemoryDependenceAnalysis::Key;
+MemoryDependenceAnalysis::MemoryDependenceAnalysis()
+ : DefaultBlockScanLimit(BlockScanLimit) {}
+
MemoryDependenceResults
MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
auto &AA = AM.getResult<AAManager>(F);
@@ -1753,7 +1756,7 @@ MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &PV = AM.getResult<PhiValuesAnalysis>(F);
- return MemoryDependenceResults(AA, AC, TLI, DT, PV);
+ return MemoryDependenceResults(AA, AC, TLI, DT, PV, DefaultBlockScanLimit);
}
char MemoryDependenceWrapperPass::ID = 0;
@@ -1807,15 +1810,15 @@ bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &P
}
unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
- return BlockScanLimit;
+ return DefaultBlockScanLimit;
}
bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
- MemDep.emplace(AA, AC, TLI, DT, PV);
+ MemDep.emplace(AA, AC, TLI, DT, PV, BlockScanLimit);
return false;
}
diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index 17f5d9b9f0ad..cfb8b7e7dcb5 100644
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -49,6 +49,7 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
+#include <cstdlib>
#include <iterator>
#include <memory>
#include <utility>
@@ -83,7 +84,7 @@ bool llvm::VerifyMemorySSA = false;
#endif
/// Enables memory ssa as a dependency for loop passes in legacy pass manager.
cl::opt<bool> llvm::EnableMSSALoopDependency(
- "enable-mssa-loop-dependency", cl::Hidden, cl::init(false),
+ "enable-mssa-loop-dependency", cl::Hidden, cl::init(true),
cl::desc("Enable MemorySSA dependency for loop pass manager"));
static cl::opt<bool, true>
@@ -284,6 +285,11 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
case Intrinsic::invariant_end:
case Intrinsic::assume:
return {false, NoAlias};
+ case Intrinsic::dbg_addr:
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_label:
+ case Intrinsic::dbg_value:
+ llvm_unreachable("debuginfo shouldn't have associated defs!");
default:
break;
}
@@ -369,7 +375,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
const Instruction *I) {
// If the memory can't be changed, then loads of the memory can't be
// clobbered.
- return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
+ return isa<LoadInst>(I) && (I->hasMetadata(LLVMContext::MD_invariant_load) ||
AA.pointsToConstantMemory(MemoryLocation(
cast<LoadInst>(I)->getPointerOperand())));
}
@@ -867,6 +873,7 @@ template <class AliasAnalysisType> class ClobberWalker {
if (!DefChainEnd)
for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target)))
DefChainEnd = MA;
+ assert(DefChainEnd && "Failed to find dominating phi/liveOnEntry");
// If any of the terminated paths don't dominate the phi we'll try to
// optimize, we need to figure out what they are and quit.
@@ -1087,9 +1094,14 @@ void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal,
AccessList *Accesses = It->second.get();
auto *Phi = cast<MemoryPhi>(&Accesses->front());
if (RenameAllUses) {
- int PhiIndex = Phi->getBasicBlockIndex(BB);
- assert(PhiIndex != -1 && "Incomplete phi during partial rename");
- Phi->setIncomingValue(PhiIndex, IncomingVal);
+ bool ReplacementDone = false;
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I)
+ if (Phi->getIncomingBlock(I) == BB) {
+ Phi->setIncomingValue(I, IncomingVal);
+ ReplacementDone = true;
+ }
+ (void) ReplacementDone;
+ assert(ReplacementDone && "Incomplete phi during partial rename");
} else
Phi->addIncoming(IncomingVal, BB);
}
@@ -1237,7 +1249,7 @@ MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr));
if (Res.second)
- Res.first->second = llvm::make_unique<AccessList>();
+ Res.first->second = std::make_unique<AccessList>();
return Res.first->second.get();
}
@@ -1245,7 +1257,7 @@ MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) {
auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr));
if (Res.second)
- Res.first->second = llvm::make_unique<DefsList>();
+ Res.first->second = std::make_unique<DefsList>();
return Res.first->second.get();
}
@@ -1554,10 +1566,10 @@ MemorySSA::CachingWalker<AliasAnalysis> *MemorySSA::getWalkerImpl() {
if (!WalkerBase)
WalkerBase =
- llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
+ std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
Walker =
- llvm::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get());
+ std::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get());
return Walker.get();
}
@@ -1567,10 +1579,10 @@ MemorySSAWalker *MemorySSA::getSkipSelfWalker() {
if (!WalkerBase)
WalkerBase =
- llvm::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
+ std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT);
SkipWalker =
- llvm::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get());
+ std::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get());
return SkipWalker.get();
}
@@ -1687,13 +1699,15 @@ MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
MemoryAccess *Definition,
- const MemoryUseOrDef *Template) {
+ const MemoryUseOrDef *Template,
+ bool CreationMustSucceed) {
assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template);
- assert(
- NewAccess != nullptr &&
- "Tried to create a memory access for a non-memory touching instruction");
- NewAccess->setDefiningAccess(Definition);
+ if (CreationMustSucceed)
+ assert(NewAccess != nullptr && "Tried to create a memory access for a "
+ "non-memory touching instruction");
+ if (NewAccess)
+ NewAccess->setDefiningAccess(Definition);
return NewAccess;
}
@@ -1717,13 +1731,21 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
AliasAnalysisType *AAP,
const MemoryUseOrDef *Template) {
// The assume intrinsic has a control dependency which we model by claiming
- // that it writes arbitrarily. Ignore that fake memory dependency here.
+ // that it writes arbitrarily. Debuginfo intrinsics may be considered
+ // clobbers when we have a nonstandard AA pipeline. Ignore these fake memory
+ // dependencies here.
// FIXME: Replace this special casing with a more accurate modelling of
// assume's control dependency.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
if (II->getIntrinsicID() == Intrinsic::assume)
return nullptr;
+ // Using a nonstandard AA pipelines might leave us with unexpected modref
+ // results for I, so add a check to not model instructions that may not read
+ // from or write to memory. This is necessary for correctness.
+ if (!I->mayReadFromMemory() && !I->mayWriteToMemory())
+ return nullptr;
+
bool Def, Use;
if (Template) {
Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr;
@@ -1850,6 +1872,7 @@ void MemorySSA::verifyMemorySSA() const {
verifyDomination(F);
verifyOrdering(F);
verifyDominationNumbers(F);
+ verifyPrevDefInPhis(F);
// Previously, the verification used to also verify that the clobberingAccess
// cached by MemorySSA is the same as the clobberingAccess found at a later
// query to AA. This does not hold true in general due to the current fragility
@@ -1862,6 +1885,40 @@ void MemorySSA::verifyMemorySSA() const {
// example, see test4 added in D51960.
}
+void MemorySSA::verifyPrevDefInPhis(Function &F) const {
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
+ for (const BasicBlock &BB : F) {
+ if (MemoryPhi *Phi = getMemoryAccess(&BB)) {
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
+ auto *Pred = Phi->getIncomingBlock(I);
+ auto *IncAcc = Phi->getIncomingValue(I);
+ // If Pred has no unreachable predecessors, get last def looking at
+ // IDoms. If, while walkings IDoms, any of these has an unreachable
+ // predecessor, then the incoming def can be any access.
+ if (auto *DTNode = DT->getNode(Pred)) {
+ while (DTNode) {
+ if (auto *DefList = getBlockDefs(DTNode->getBlock())) {
+ auto *LastAcc = &*(--DefList->end());
+ assert(LastAcc == IncAcc &&
+ "Incorrect incoming access into phi.");
+ break;
+ }
+ DTNode = DTNode->getIDom();
+ }
+ } else {
+ // If Pred has unreachable predecessors, but has at least a Def, the
+ // incoming access can be the last Def in Pred, or it could have been
+ // optimized to LoE. After an update, though, the LoE may have been
+ // replaced by another access, so IncAcc may be any access.
+ // If Pred has unreachable predecessors and no Defs, incoming access
+ // should be LoE; However, after an update, it may be any access.
+ }
+ }
+ }
+ }
+#endif
+}
+
/// Verify that all of the blocks we believe to have valid domination numbers
/// actually have valid domination numbers.
void MemorySSA::verifyDominationNumbers(const Function &F) const {
@@ -2005,7 +2062,7 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
/// accesses and verifying that, for each use, it appears in the
/// appropriate def's use list
void MemorySSA::verifyDefUses(Function &F) const {
-#ifndef NDEBUG
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
for (BasicBlock &B : F) {
// Phi nodes are attached to basic blocks
if (MemoryPhi *Phi = getMemoryAccess(&B)) {
@@ -2212,7 +2269,7 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
- return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT));
+ return MemorySSAAnalysis::Result(std::make_unique<MemorySSA>(F, &AA, &DT));
}
bool MemorySSAAnalysis::Result::invalidate(
diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp
index 4c1feee7fd9a..f2d56b05d968 100644
--- a/lib/Analysis/MemorySSAUpdater.cpp
+++ b/lib/Analysis/MemorySSAUpdater.cpp
@@ -44,11 +44,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
// First, do a cache lookup. Without this cache, certain CFG structures
// (like a series of if statements) take exponential time to visit.
auto Cached = CachedPreviousDef.find(BB);
- if (Cached != CachedPreviousDef.end()) {
+ if (Cached != CachedPreviousDef.end())
return Cached->second;
- }
- if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ // If this method is called from an unreachable block, return LoE.
+ if (!MSSA->DT->isReachableFromEntry(BB))
+ return MSSA->getLiveOnEntryDef();
+
+ if (BasicBlock *Pred = BB->getUniquePredecessor()) {
+ VisitedBlocks.insert(BB);
// Single predecessor case, just recurse, we can only have one definition.
MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef);
CachedPreviousDef.insert({BB, Result});
@@ -71,11 +75,19 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
// Recurse to get the values in our predecessors for placement of a
// potential phi node. This will insert phi nodes if we cycle in order to
// break the cycle and have an operand.
- for (auto *Pred : predecessors(BB))
- if (MSSA->DT->isReachableFromEntry(Pred))
- PhiOps.push_back(getPreviousDefFromEnd(Pred, CachedPreviousDef));
- else
+ bool UniqueIncomingAccess = true;
+ MemoryAccess *SingleAccess = nullptr;
+ for (auto *Pred : predecessors(BB)) {
+ if (MSSA->DT->isReachableFromEntry(Pred)) {
+ auto *IncomingAccess = getPreviousDefFromEnd(Pred, CachedPreviousDef);
+ if (!SingleAccess)
+ SingleAccess = IncomingAccess;
+ else if (IncomingAccess != SingleAccess)
+ UniqueIncomingAccess = false;
+ PhiOps.push_back(IncomingAccess);
+ } else
PhiOps.push_back(MSSA->getLiveOnEntryDef());
+ }
// Now try to simplify the ops to avoid placing a phi.
// This may return null if we never created a phi yet, that's okay
@@ -84,7 +96,15 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
// See if we can avoid the phi by simplifying it.
auto *Result = tryRemoveTrivialPhi(Phi, PhiOps);
// If we couldn't simplify, we may have to create a phi
- if (Result == Phi) {
+ if (Result == Phi && UniqueIncomingAccess && SingleAccess) {
+ // A concrete Phi only exists if we created an empty one to break a cycle.
+ if (Phi) {
+ assert(Phi->operands().empty() && "Expected empty Phi");
+ Phi->replaceAllUsesWith(SingleAccess);
+ removeMemoryAccess(Phi);
+ }
+ Result = SingleAccess;
+ } else if (Result == Phi && !(UniqueIncomingAccess && SingleAccess)) {
if (!Phi)
Phi = MSSA->createMemoryPhi(BB);
@@ -173,12 +193,9 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
TrackingVH<MemoryAccess> Res(Phi);
SmallVector<TrackingVH<Value>, 8> Uses;
std::copy(Phi->user_begin(), Phi->user_end(), std::back_inserter(Uses));
- for (auto &U : Uses) {
- if (MemoryPhi *UsePhi = dyn_cast<MemoryPhi>(&*U)) {
- auto OperRange = UsePhi->operands();
- tryRemoveTrivialPhi(UsePhi, OperRange);
- }
- }
+ for (auto &U : Uses)
+ if (MemoryPhi *UsePhi = dyn_cast<MemoryPhi>(&*U))
+ tryRemoveTrivialPhi(UsePhi);
return Res;
}
@@ -187,6 +204,11 @@ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
// argument.
// IE phi(a, a) or b = phi(a, b) or c = phi(a, a, c)
// We recursively try to remove them.
+MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi) {
+ assert(Phi && "Can only remove concrete Phi.");
+ auto OperRange = Phi->operands();
+ return tryRemoveTrivialPhi(Phi, OperRange);
+}
template <class RangeType>
MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi,
RangeType &Operands) {
@@ -218,17 +240,49 @@ MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi,
return recursePhi(Same);
}
-void MemorySSAUpdater::insertUse(MemoryUse *MU) {
+void MemorySSAUpdater::insertUse(MemoryUse *MU, bool RenameUses) {
InsertedPHIs.clear();
MU->setDefiningAccess(getPreviousDef(MU));
- // Unlike for defs, there is no extra work to do. Because uses do not create
- // new may-defs, there are only two cases:
- //
+
+ // In cases without unreachable blocks, because uses do not create new
+ // may-defs, there are only two cases:
// 1. There was a def already below us, and therefore, we should not have
// created a phi node because it was already needed for the def.
//
// 2. There is no def below us, and therefore, there is no extra renaming work
// to do.
+
+ // In cases with unreachable blocks, where the unnecessary Phis were
+ // optimized out, adding the Use may re-insert those Phis. Hence, when
+ // inserting Uses outside of the MSSA creation process, and new Phis were
+ // added, rename all uses if we are asked.
+
+ if (!RenameUses && !InsertedPHIs.empty()) {
+ auto *Defs = MSSA->getBlockDefs(MU->getBlock());
+ (void)Defs;
+ assert((!Defs || (++Defs->begin() == Defs->end())) &&
+ "Block may have only a Phi or no defs");
+ }
+
+ if (RenameUses && InsertedPHIs.size()) {
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ BasicBlock *StartBlock = MU->getBlock();
+
+ if (auto *Defs = MSSA->getWritableBlockDefs(StartBlock)) {
+ MemoryAccess *FirstDef = &*Defs->begin();
+ // Convert to incoming value if it's a memorydef. A phi *is* already an
+ // incoming value.
+ if (auto *MD = dyn_cast<MemoryDef>(FirstDef))
+ FirstDef = MD->getDefiningAccess();
+
+ MSSA->renamePass(MU->getBlock(), FirstDef, Visited);
+ }
+ // We just inserted a phi into this block, so the incoming value will
+ // become the phi anyway, so it does not matter what we pass.
+ for (auto &MP : InsertedPHIs)
+ if (MemoryPhi *Phi = cast_or_null<MemoryPhi>(MP))
+ MSSA->renamePass(Phi->getBlock(), nullptr, Visited);
+ }
}
// Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef.
@@ -260,33 +314,35 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// See if we had a local def, and if not, go hunting.
MemoryAccess *DefBefore = getPreviousDef(MD);
- bool DefBeforeSameBlock = DefBefore->getBlock() == MD->getBlock();
+ bool DefBeforeSameBlock = false;
+ if (DefBefore->getBlock() == MD->getBlock() &&
+ !(isa<MemoryPhi>(DefBefore) &&
+ std::find(InsertedPHIs.begin(), InsertedPHIs.end(), DefBefore) !=
+ InsertedPHIs.end()))
+ DefBeforeSameBlock = true;
// There is a def before us, which means we can replace any store/phi uses
// of that thing with us, since we are in the way of whatever was there
// before.
// We now define that def's memorydefs and memoryphis
if (DefBeforeSameBlock) {
- for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end();
- UI != UE;) {
- Use &U = *UI++;
+ DefBefore->replaceUsesWithIf(MD, [MD](Use &U) {
// Leave the MemoryUses alone.
// Also make sure we skip ourselves to avoid self references.
- if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD)
- continue;
+ User *Usr = U.getUser();
+ return !isa<MemoryUse>(Usr) && Usr != MD;
// Defs are automatically unoptimized when the user is set to MD below,
// because the isOptimized() call will fail to find the same ID.
- U.set(MD);
- }
+ });
}
// and that def is now our defining access.
MD->setDefiningAccess(DefBefore);
- // Remember the index where we may insert new phis below.
- unsigned NewPhiIndex = InsertedPHIs.size();
-
SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
+
+ // Remember the index where we may insert new phis.
+ unsigned NewPhiIndex = InsertedPHIs.size();
if (!DefBeforeSameBlock) {
// If there was a local def before us, we must have the same effect it
// did. Because every may-def is the same, any phis/etc we would create, it
@@ -302,46 +358,54 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// If this is the first def in the block and this insert is in an arbitrary
// place, compute IDF and place phis.
+ SmallPtrSet<BasicBlock *, 2> DefiningBlocks;
+
+ // If this is the last Def in the block, also compute IDF based on MD, since
+ // this may a new Def added, and we may need additional Phis.
auto Iter = MD->getDefsIterator();
++Iter;
auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end();
- if (Iter == IterEnd) {
- ForwardIDFCalculator IDFs(*MSSA->DT);
- SmallVector<BasicBlock *, 32> IDFBlocks;
- SmallPtrSet<BasicBlock *, 2> DefiningBlocks;
+ if (Iter == IterEnd)
DefiningBlocks.insert(MD->getBlock());
- IDFs.setDefiningBlocks(DefiningBlocks);
- IDFs.calculate(IDFBlocks);
- SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs;
- for (auto *BBIDF : IDFBlocks)
- if (!MSSA->getMemoryAccess(BBIDF)) {
- auto *MPhi = MSSA->createMemoryPhi(BBIDF);
- NewInsertedPHIs.push_back(MPhi);
- // Add the phis created into the IDF blocks to NonOptPhis, so they are
- // not optimized out as trivial by the call to getPreviousDefFromEnd
- // below. Once they are complete, all these Phis are added to the
- // FixupList, and removed from NonOptPhis inside fixupDefs().
- NonOptPhis.insert(MPhi);
- }
- for (auto &MPhi : NewInsertedPHIs) {
- auto *BBIDF = MPhi->getBlock();
- for (auto *Pred : predecessors(BBIDF)) {
- DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
- MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef),
- Pred);
- }
+ for (const auto &VH : InsertedPHIs)
+ if (const auto *RealPHI = cast_or_null<MemoryPhi>(VH))
+ DefiningBlocks.insert(RealPHI->getBlock());
+ ForwardIDFCalculator IDFs(*MSSA->DT);
+ SmallVector<BasicBlock *, 32> IDFBlocks;
+ IDFs.setDefiningBlocks(DefiningBlocks);
+ IDFs.calculate(IDFBlocks);
+ SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs;
+ for (auto *BBIDF : IDFBlocks) {
+ auto *MPhi = MSSA->getMemoryAccess(BBIDF);
+ if (!MPhi) {
+ MPhi = MSSA->createMemoryPhi(BBIDF);
+ NewInsertedPHIs.push_back(MPhi);
}
-
- // Re-take the index where we're adding the new phis, because the above
- // call to getPreviousDefFromEnd, may have inserted into InsertedPHIs.
- NewPhiIndex = InsertedPHIs.size();
- for (auto &MPhi : NewInsertedPHIs) {
- InsertedPHIs.push_back(&*MPhi);
- FixupList.push_back(&*MPhi);
+ // Add the phis created into the IDF blocks to NonOptPhis, so they are not
+ // optimized out as trivial by the call to getPreviousDefFromEnd below.
+ // Once they are complete, all these Phis are added to the FixupList, and
+ // removed from NonOptPhis inside fixupDefs(). Existing Phis in IDF may
+ // need fixing as well, and potentially be trivial before this insertion,
+ // hence add all IDF Phis. See PR43044.
+ NonOptPhis.insert(MPhi);
+ }
+ for (auto &MPhi : NewInsertedPHIs) {
+ auto *BBIDF = MPhi->getBlock();
+ for (auto *Pred : predecessors(BBIDF)) {
+ DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
+ MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), Pred);
}
}
+ // Re-take the index where we're adding the new phis, because the above call
+ // to getPreviousDefFromEnd, may have inserted into InsertedPHIs.
+ NewPhiIndex = InsertedPHIs.size();
+ for (auto &MPhi : NewInsertedPHIs) {
+ InsertedPHIs.push_back(&*MPhi);
+ FixupList.push_back(&*MPhi);
+ }
+
FixupList.push_back(MD);
}
@@ -458,8 +522,7 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) {
void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) {
if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
MPhi->unorderedDeleteIncomingBlock(From);
- if (MPhi->getNumIncomingValues() == 1)
- removeMemoryAccess(MPhi);
+ tryRemoveTrivialPhi(MPhi);
}
}
@@ -475,34 +538,51 @@ void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(const BasicBlock *From,
Found = true;
return false;
});
- if (MPhi->getNumIncomingValues() == 1)
- removeMemoryAccess(MPhi);
+ tryRemoveTrivialPhi(MPhi);
+ }
+}
+
+static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA,
+ const ValueToValueMapTy &VMap,
+ PhiToDefMap &MPhiMap,
+ bool CloneWasSimplified,
+ MemorySSA *MSSA) {
+ MemoryAccess *InsnDefining = MA;
+ if (MemoryDef *DefMUD = dyn_cast<MemoryDef>(InsnDefining)) {
+ if (!MSSA->isLiveOnEntryDef(DefMUD)) {
+ Instruction *DefMUDI = DefMUD->getMemoryInst();
+ assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
+ if (Instruction *NewDefMUDI =
+ cast_or_null<Instruction>(VMap.lookup(DefMUDI))) {
+ InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
+ if (!CloneWasSimplified)
+ assert(InsnDefining && "Defining instruction cannot be nullptr.");
+ else if (!InsnDefining || isa<MemoryUse>(InsnDefining)) {
+ // The clone was simplified, it's no longer a MemoryDef, look up.
+ auto DefIt = DefMUD->getDefsIterator();
+ // Since simplified clones only occur in single block cloning, a
+ // previous definition must exist, otherwise NewDefMUDI would not
+ // have been found in VMap.
+ assert(DefIt != MSSA->getBlockDefs(DefMUD->getBlock())->begin() &&
+ "Previous def must exist");
+ InsnDefining = getNewDefiningAccessForClone(
+ &*(--DefIt), VMap, MPhiMap, CloneWasSimplified, MSSA);
+ }
+ }
+ }
+ } else {
+ MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining);
+ if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi))
+ InsnDefining = NewDefPhi;
}
+ assert(InsnDefining && "Defining instruction cannot be nullptr.");
+ return InsnDefining;
}
void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
const ValueToValueMapTy &VMap,
PhiToDefMap &MPhiMap,
bool CloneWasSimplified) {
- auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * {
- MemoryAccess *InsnDefining = MA;
- if (MemoryUseOrDef *DefMUD = dyn_cast<MemoryUseOrDef>(InsnDefining)) {
- if (!MSSA->isLiveOnEntryDef(DefMUD)) {
- Instruction *DefMUDI = DefMUD->getMemoryInst();
- assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
- if (Instruction *NewDefMUDI =
- cast_or_null<Instruction>(VMap.lookup(DefMUDI)))
- InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
- }
- } else {
- MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining);
- if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi))
- InsnDefining = NewDefPhi;
- }
- assert(InsnDefining && "Defining instruction cannot be nullptr.");
- return InsnDefining;
- };
-
const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
if (!Acc)
return;
@@ -519,9 +599,13 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
if (Instruction *NewInsn =
dyn_cast_or_null<Instruction>(VMap.lookup(Insn))) {
MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
- NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()),
- CloneWasSimplified ? nullptr : MUD);
- MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
+ NewInsn,
+ getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap,
+ MPhiMap, CloneWasSimplified, MSSA),
+ /*Template=*/CloneWasSimplified ? nullptr : MUD,
+ /*CreationMustSucceed=*/CloneWasSimplified ? false : true);
+ if (NewUseOrDef)
+ MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
}
}
}
@@ -563,8 +647,7 @@ void MemorySSAUpdater::updatePhisWhenInsertingUniqueBackedgeBlock(
// If NewMPhi is a trivial phi, remove it. Its use in the header MPhi will be
// replaced with the unique value.
- if (HasUniqueIncomingValue)
- removeMemoryAccess(NewMPhi);
+ tryRemoveTrivialPhi(NewMPhi);
}
void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
@@ -770,6 +853,9 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
} else {
// Single predecessor, BB cannot be dead. GetLastDef of Pred.
assert(Count == 1 && Pred && "Single predecessor expected.");
+ // BB can be unreachable though, return LoE if that is the case.
+ if (!DT.getNode(BB))
+ return MSSA->getLiveOnEntryDef();
BB = Pred;
}
};
@@ -1010,7 +1096,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
for (; UI != E;) {
Use &U = *UI;
++UI;
- MemoryAccess *Usr = dyn_cast<MemoryAccess>(U.getUser());
+ MemoryAccess *Usr = cast<MemoryAccess>(U.getUser());
if (MemoryPhi *UsrPhi = dyn_cast<MemoryPhi>(Usr)) {
BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U);
if (!DT.dominates(DominatingBlock, DominatedBlock))
@@ -1052,9 +1138,9 @@ void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
// Now reinsert it into the IR and do whatever fixups needed.
if (auto *MD = dyn_cast<MemoryDef>(What))
- insertDef(MD);
+ insertDef(MD, /*RenameUses=*/true);
else
- insertUse(cast<MemoryUse>(What));
+ insertUse(cast<MemoryUse>(What), /*RenameUses=*/true);
// Clear dangling pointers. We added all MemoryPhi users, but not all
// of them are removed by fixupDefs().
@@ -1084,25 +1170,32 @@ void MemorySSAUpdater::moveAllAccesses(BasicBlock *From, BasicBlock *To,
if (!Accs)
return;
+ assert(Start->getParent() == To && "Incorrect Start instruction");
MemoryAccess *FirstInNew = nullptr;
for (Instruction &I : make_range(Start->getIterator(), To->end()))
if ((FirstInNew = MSSA->getMemoryAccess(&I)))
break;
- if (!FirstInNew)
- return;
+ if (FirstInNew) {
+ auto *MUD = cast<MemoryUseOrDef>(FirstInNew);
+ do {
+ auto NextIt = ++MUD->getIterator();
+ MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end())
+ ? nullptr
+ : cast<MemoryUseOrDef>(&*NextIt);
+ MSSA->moveTo(MUD, To, MemorySSA::End);
+ // Moving MUD from Accs in the moveTo above, may delete Accs, so we need
+ // to retrieve it again.
+ Accs = MSSA->getWritableBlockAccesses(From);
+ MUD = NextMUD;
+ } while (MUD);
+ }
- auto *MUD = cast<MemoryUseOrDef>(FirstInNew);
- do {
- auto NextIt = ++MUD->getIterator();
- MemoryUseOrDef *NextMUD = (!Accs || NextIt == Accs->end())
- ? nullptr
- : cast<MemoryUseOrDef>(&*NextIt);
- MSSA->moveTo(MUD, To, MemorySSA::End);
- // Moving MUD from Accs in the moveTo above, may delete Accs, so we need to
- // retrieve it again.
- Accs = MSSA->getWritableBlockAccesses(From);
- MUD = NextMUD;
- } while (MUD);
+ // If all accesses were moved and only a trivial Phi remains, we try to remove
+ // that Phi. This is needed when From is going to be deleted.
+ auto *Defs = MSSA->getWritableBlockDefs(From);
+ if (Defs && !Defs->empty())
+ if (auto *Phi = dyn_cast<MemoryPhi>(&*Defs->begin()))
+ tryRemoveTrivialPhi(Phi);
}
void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From,
@@ -1118,7 +1211,7 @@ void MemorySSAUpdater::moveAllAfterSpliceBlocks(BasicBlock *From,
void MemorySSAUpdater::moveAllAfterMergeBlocks(BasicBlock *From, BasicBlock *To,
Instruction *Start) {
- assert(From->getSinglePredecessor() == To &&
+ assert(From->getUniquePredecessor() == To &&
"From block is expected to have a single predecessor (To).");
moveAllAccesses(From, To, Start);
for (BasicBlock *Succ : successors(From))
@@ -1173,8 +1266,7 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
return false;
});
Phi->addIncoming(NewPhi, New);
- if (onlySingleValue(NewPhi))
- removeMemoryAccess(NewPhi);
+ tryRemoveTrivialPhi(NewPhi);
}
}
@@ -1239,10 +1331,8 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) {
unsigned PhisSize = PhisToOptimize.size();
while (PhisSize-- > 0)
if (MemoryPhi *MP =
- cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val())) {
- auto OperRange = MP->operands();
- tryRemoveTrivialPhi(MP, OperRange);
- }
+ cast_or_null<MemoryPhi>(PhisToOptimize.pop_back_val()))
+ tryRemoveTrivialPhi(MP);
}
}
@@ -1256,8 +1346,7 @@ void MemorySSAUpdater::removeBlocks(
if (!DeadBlocks.count(Succ))
if (MemoryPhi *MP = MSSA->getMemoryAccess(Succ)) {
MP->unorderedDeleteIncomingBlock(BB);
- if (MP->getNumIncomingValues() == 1)
- removeMemoryAccess(MP);
+ tryRemoveTrivialPhi(MP);
}
// Drop all references of all accesses in BB
if (MemorySSA::AccessList *Acc = MSSA->getWritableBlockAccesses(BB))
@@ -1281,10 +1370,8 @@ void MemorySSAUpdater::removeBlocks(
void MemorySSAUpdater::tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs) {
for (auto &VH : UpdatedPHIs)
- if (auto *MPhi = cast_or_null<MemoryPhi>(VH)) {
- auto OperRange = MPhi->operands();
- tryRemoveTrivialPhi(MPhi, OperRange);
- }
+ if (auto *MPhi = cast_or_null<MemoryPhi>(VH))
+ tryRemoveTrivialPhi(MPhi);
}
void MemorySSAUpdater::changeToUnreachable(const Instruction *I) {
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index e25eb290a665..8232bf07cafc 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -319,7 +319,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
auto *CalledValue = CS.getCalledValue();
auto *CalledFunction = CS.getCalledFunction();
if (CalledValue && !CalledFunction) {
- CalledValue = CalledValue->stripPointerCastsNoFollowAliases();
+ CalledValue = CalledValue->stripPointerCasts();
// Stripping pointer casts can reveal a called function.
CalledFunction = dyn_cast<Function>(CalledValue);
}
@@ -467,7 +467,7 @@ static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// FIXME: refactor this to use the same code that inliner is using.
// Don't try to import functions with noinline attribute.
F.getAttributes().hasFnAttribute(Attribute::NoInline)};
- auto FuncSummary = llvm::make_unique<FunctionSummary>(
+ auto FuncSummary = std::make_unique<FunctionSummary>(
Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
CallGraphEdges.takeVector(), TypeTests.takeVector(),
TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
@@ -598,7 +598,7 @@ static void computeVariableSummary(ModuleSummaryIndex &Index,
!V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
!V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass();
GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized);
- auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
+ auto GVarSummary = std::make_unique<GlobalVarSummary>(Flags, VarFlags,
RefEdges.takeVector());
if (NonRenamableLocal)
CantBePromoted.insert(V.getGUID());
@@ -616,7 +616,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
/* Live = */ false, A.isDSOLocal(),
A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
- auto AS = llvm::make_unique<AliasSummary>(Flags);
+ auto AS = std::make_unique<AliasSummary>(Flags);
auto *Aliasee = A.getBaseObject();
auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
assert(AliaseeVI && "Alias expects aliasee summary to be available");
@@ -696,7 +696,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
// Create the appropriate summary type.
if (Function *F = dyn_cast<Function>(GV)) {
std::unique_ptr<FunctionSummary> Summary =
- llvm::make_unique<FunctionSummary>(
+ std::make_unique<FunctionSummary>(
GVFlags, /*InstCount=*/0,
FunctionSummary::FFlags{
F->hasFnAttribute(Attribute::ReadNone),
@@ -714,7 +714,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
Index.addGlobalValueSummary(*GV, std::move(Summary));
} else {
std::unique_ptr<GlobalVarSummary> Summary =
- llvm::make_unique<GlobalVarSummary>(
+ std::make_unique<GlobalVarSummary>(
GVFlags, GlobalVarSummary::GVarFlags(false, false),
ArrayRef<ValueInfo>{});
Index.addGlobalValueSummary(*GV, std::move(Summary));
@@ -741,7 +741,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
else if (F.hasProfileData()) {
LoopInfo LI{DT};
BranchProbabilityInfo BPI{F, LI};
- BFIPtr = llvm::make_unique<BlockFrequencyInfo>(F, BPI, LI);
+ BFIPtr = std::make_unique<BlockFrequencyInfo>(F, BPI, LI);
BFI = BFIPtr.get();
}
@@ -813,7 +813,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
if (!ModuleSummaryDotFile.empty()) {
std::error_code EC;
- raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::F_None);
+ raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_None);
if (EC)
report_fatal_error(Twine("Failed to open dot file ") +
ModuleSummaryDotFile + ": " + EC.message() + "\n");
diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp
index b616cd6f762b..44527773115d 100644
--- a/lib/Analysis/MustExecute.cpp
+++ b/lib/Analysis/MustExecute.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/MustExecute.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
@@ -19,8 +21,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
+#define DEBUG_TYPE "must-execute"
+
const DenseMap<BasicBlock *, ColorVector> &
LoopSafetyInfo::getBlockColors() const {
return BlockColors;
@@ -306,6 +311,17 @@ namespace {
}
bool runOnFunction(Function &F) override;
};
+ struct MustBeExecutedContextPrinter : public ModulePass {
+ static char ID;
+
+ MustBeExecutedContextPrinter() : ModulePass(ID) {
+ initializeMustBeExecutedContextPrinterPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ bool runOnModule(Module &M) override;
+ };
}
char MustExecutePrinter::ID = 0;
@@ -320,6 +336,36 @@ FunctionPass *llvm::createMustExecutePrinter() {
return new MustExecutePrinter();
}
+char MustBeExecutedContextPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ MustBeExecutedContextPrinter, "print-must-be-executed-contexts",
+ "print the must-be-executed-contexed for all instructions", false, true)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(MustBeExecutedContextPrinter,
+ "print-must-be-executed-contexts",
+ "print the must-be-executed-contexed for all instructions",
+ false, true)
+
+ModulePass *llvm::createMustBeExecutedContextPrinter() {
+ return new MustBeExecutedContextPrinter();
+}
+
+bool MustBeExecutedContextPrinter::runOnModule(Module &M) {
+ MustBeExecutedContextExplorer Explorer(true);
+ for (Function &F : M) {
+ for (Instruction &I : instructions(F)) {
+ dbgs() << "-- Explore context of: " << I << "\n";
+ for (const Instruction *CI : Explorer.range(&I))
+ dbgs() << " [F: " << CI->getFunction()->getName() << "] " << *CI
+ << "\n";
+ }
+ }
+
+ return false;
+}
+
static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) {
// TODO: merge these two routines. For the moment, we display the best
// result obtained by *either* implementation. This is a bit unfair since no
@@ -396,3 +442,75 @@ bool MustExecutePrinter::runOnFunction(Function &F) {
return false;
}
+
+const Instruction *
+MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction(
+ MustBeExecutedIterator &It, const Instruction *PP) {
+ if (!PP)
+ return PP;
+ LLVM_DEBUG(dbgs() << "Find next instruction for " << *PP << "\n");
+
+ // If we explore only inside a given basic block we stop at terminators.
+ if (!ExploreInterBlock && PP->isTerminator()) {
+ LLVM_DEBUG(dbgs() << "\tReached terminator in intra-block mode, done\n");
+ return nullptr;
+ }
+
+ // If we do not traverse the call graph we check if we can make progress in
+ // the current function. First, check if the instruction is guaranteed to
+ // transfer execution to the successor.
+ bool TransfersExecution = isGuaranteedToTransferExecutionToSuccessor(PP);
+ if (!TransfersExecution)
+ return nullptr;
+
+ // If this is not a terminator we know that there is a single instruction
+ // after this one that is executed next if control is transfered. If not,
+ // we can try to go back to a call site we entered earlier. If none exists, we
+ // do not know any instruction that has to be executd next.
+ if (!PP->isTerminator()) {
+ const Instruction *NextPP = PP->getNextNode();
+ LLVM_DEBUG(dbgs() << "\tIntermediate instruction does transfer control\n");
+ return NextPP;
+ }
+
+ // Finally, we have to handle terminators, trivial ones first.
+ assert(PP->isTerminator() && "Expected a terminator!");
+
+ // A terminator without a successor is not handled yet.
+ if (PP->getNumSuccessors() == 0) {
+ LLVM_DEBUG(dbgs() << "\tUnhandled terminator\n");
+ return nullptr;
+ }
+
+ // A terminator with a single successor, we will continue at the beginning of
+ // that one.
+ if (PP->getNumSuccessors() == 1) {
+ LLVM_DEBUG(
+ dbgs() << "\tUnconditional terminator, continue with successor\n");
+ return &PP->getSuccessor(0)->front();
+ }
+
+ LLVM_DEBUG(dbgs() << "\tNo join point found\n");
+ return nullptr;
+}
+
+MustBeExecutedIterator::MustBeExecutedIterator(
+ MustBeExecutedContextExplorer &Explorer, const Instruction *I)
+ : Explorer(Explorer), CurInst(I) {
+ reset(I);
+}
+
+void MustBeExecutedIterator::reset(const Instruction *I) {
+ CurInst = I;
+ Visited.clear();
+ Visited.insert(I);
+}
+
+const Instruction *MustBeExecutedIterator::advance() {
+ assert(CurInst && "Cannot advance an end iterator!");
+ const Instruction *Next =
+ Explorer.getMustBeExecutedNextInstruction(*this, CurInst);
+ if (Next && !Visited.insert(Next).second)
+ Next = nullptr;
+ return Next;
+}
diff --git a/lib/Analysis/OptimizationRemarkEmitter.cpp b/lib/Analysis/OptimizationRemarkEmitter.cpp
index 72c40a0be232..07a5619a35b9 100644
--- a/lib/Analysis/OptimizationRemarkEmitter.cpp
+++ b/lib/Analysis/OptimizationRemarkEmitter.cpp
@@ -39,7 +39,7 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F)
BPI.calculate(*F, LI);
// Finally compute BFI.
- OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
+ OwnedBFI = std::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
BFI = OwnedBFI.get();
}
@@ -97,7 +97,7 @@ bool OptimizationRemarkEmitterWrapperPass::runOnFunction(Function &Fn) {
else
BFI = nullptr;
- ORE = llvm::make_unique<OptimizationRemarkEmitter>(&Fn, BFI);
+ ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn, BFI);
return false;
}
diff --git a/lib/Analysis/OrderedInstructions.cpp b/lib/Analysis/OrderedInstructions.cpp
index 458c0a7de6c2..e947e5e388a8 100644
--- a/lib/Analysis/OrderedInstructions.cpp
+++ b/lib/Analysis/OrderedInstructions.cpp
@@ -21,7 +21,7 @@ bool OrderedInstructions::localDominates(const Instruction *InstA,
const BasicBlock *IBB = InstA->getParent();
auto OBB = OBBMap.find(IBB);
if (OBB == OBBMap.end())
- OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first;
+ OBB = OBBMap.insert({IBB, std::make_unique<OrderedBasicBlock>(IBB)}).first;
return OBB->second->dominates(InstA, InstB);
}
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index dce19d6d546e..b99b75715025 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -45,6 +45,13 @@ static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
" blocks required to reach the -profile-summary-cutoff-hot"
" percentile exceeds this count."));
+static cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
+ "profile-summary-large-working-set-size-threshold", cl::Hidden,
+ cl::init(12500), cl::ZeroOrMore,
+ cl::desc("The code working set size is considered large if the number of"
+ " blocks required to reach the -profile-summary-cutoff-hot"
+ " percentile exceeds this count."));
+
// The next two options override the counts derived from summary computation and
// are useful for debugging purposes.
static cl::opt<int> ProfileSummaryHotCount(
@@ -186,6 +193,31 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
return true;
}
+// Like isFunctionHotInCallGraph but for a given cutoff.
+bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile(
+ int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) {
+ if (!F || !computeSummary())
+ return false;
+ if (auto FunctionCount = F->getEntryCount())
+ if (isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount()))
+ return true;
+
+ if (hasSampleProfile()) {
+ uint64_t TotalCallCount = 0;
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (auto CallCount = getProfileCount(&I, nullptr))
+ TotalCallCount += CallCount.getValue();
+ if (isHotCountNthPercentile(PercentileCutoff, TotalCallCount))
+ return true;
+ }
+ for (const auto &BB : *F)
+ if (isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI))
+ return true;
+ return false;
+}
+
/// Returns true if the function's entry is a cold. If it returns false, it
/// either means it is not cold or it is unknown whether it is cold or not (for
/// example, no profile data is available).
@@ -222,6 +254,23 @@ void ProfileSummaryInfo::computeThresholds() {
"Cold count threshold cannot exceed hot count threshold!");
HasHugeWorkingSetSize =
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
+ HasLargeWorkingSetSize =
+ HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold;
+}
+
+Optional<uint64_t> ProfileSummaryInfo::computeThreshold(int PercentileCutoff) {
+ if (!computeSummary())
+ return None;
+ auto iter = ThresholdCache.find(PercentileCutoff);
+ if (iter != ThresholdCache.end()) {
+ return iter->second;
+ }
+ auto &DetailedSummary = Summary->getDetailedSummary();
+ auto &Entry =
+ getEntryForPercentile(DetailedSummary, PercentileCutoff);
+ uint64_t CountThreshold = Entry.MinCount;
+ ThresholdCache[PercentileCutoff] = CountThreshold;
+ return CountThreshold;
}
bool ProfileSummaryInfo::hasHugeWorkingSetSize() {
@@ -230,6 +279,12 @@ bool ProfileSummaryInfo::hasHugeWorkingSetSize() {
return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue();
}
+bool ProfileSummaryInfo::hasLargeWorkingSetSize() {
+ if (!HasLargeWorkingSetSize)
+ computeThresholds();
+ return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue();
+}
+
bool ProfileSummaryInfo::isHotCount(uint64_t C) {
if (!HotCountThreshold)
computeThresholds();
@@ -242,6 +297,11 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
return ColdCountThreshold && C <= ColdCountThreshold.getValue();
}
+bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, uint64_t C) {
+ auto CountThreshold = computeThreshold(PercentileCutoff);
+ return CountThreshold && C >= CountThreshold.getValue();
+}
+
uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() {
if (!HotCountThreshold)
computeThresholds();
@@ -265,6 +325,13 @@ bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
return Count && isColdCount(*Count);
}
+bool ProfileSummaryInfo::isHotBlockNthPercentile(int PercentileCutoff,
+ const BasicBlock *BB,
+ BlockFrequencyInfo *BFI) {
+ auto Count = BFI->getBlockProfileCount(BB);
+ return Count && isHotCountNthPercentile(PercentileCutoff, *Count);
+}
+
bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
BlockFrequencyInfo *BFI) {
auto C = getProfileCount(CS.getInstruction(), BFI);
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index bc2cfd6fcc42..5ce0a1adeaa0 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -148,6 +148,7 @@ STATISTIC(NumBruteForceTripCountsComputed,
static cl::opt<unsigned>
MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+ cl::ZeroOrMore,
cl::desc("Maximum number of iterations SCEV will "
"symbolically execute a constant "
"derived loop"),
@@ -157,6 +158,9 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
static cl::opt<bool> VerifySCEV(
"verify-scev", cl::Hidden,
cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
+static cl::opt<bool> VerifySCEVStrict(
+ "verify-scev-strict", cl::Hidden,
+ cl::desc("Enable stricter verification with -verify-scev is passed"));
static cl::opt<bool>
VerifySCEVMap("verify-scev-maps", cl::Hidden,
cl::desc("Verify no dangling value in ScalarEvolution's "
@@ -1707,7 +1711,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// in infinite recursion. In the later case, the analysis code will
// cope with a conservative value, and it will take care to purge
// that value once it has finished.
- const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+ const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
// Manually compute the final value for AR, checking for
// overflow.
@@ -2051,7 +2055,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
// in infinite recursion. In the later case, the analysis code will
// cope with a conservative value, and it will take care to purge
// that value once it has finished.
- const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+ const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L);
if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
// Manually compute the final value for AR, checking for
// overflow.
@@ -3421,7 +3425,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X
}
- // It's tempting to want to call getMaxBackedgeTakenCount count here and
+ // It's tempting to want to call getConstantMaxBackedgeTakenCount count here and
// use that information to infer NUW and NSW flags. However, computing a
// BE count requires calling getAddRecExpr, so we may not yet have a
// meaningful BE count at this point (and if we don't, we'd be stuck
@@ -4991,7 +4995,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
// overflow.
if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
- (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags);
+ (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
return PHISCEV;
}
@@ -5596,6 +5600,22 @@ ScalarEvolution::getRangeRef(const SCEV *S,
ConservativeResult.intersectWith(X, RangeType));
}
+ if (const SCEVSMinExpr *SMin = dyn_cast<SCEVSMinExpr>(S)) {
+ ConstantRange X = getRangeRef(SMin->getOperand(0), SignHint);
+ for (unsigned i = 1, e = SMin->getNumOperands(); i != e; ++i)
+ X = X.smin(getRangeRef(SMin->getOperand(i), SignHint));
+ return setRange(SMin, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
+ }
+
+ if (const SCEVUMinExpr *UMin = dyn_cast<SCEVUMinExpr>(S)) {
+ ConstantRange X = getRangeRef(UMin->getOperand(0), SignHint);
+ for (unsigned i = 1, e = UMin->getNumOperands(); i != e; ++i)
+ X = X.umin(getRangeRef(UMin->getOperand(i), SignHint));
+ return setRange(UMin, SignHint,
+ ConservativeResult.intersectWith(X, RangeType));
+ }
+
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint);
ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint);
@@ -5654,7 +5674,7 @@ ScalarEvolution::getRangeRef(const SCEV *S,
// TODO: non-affine addrec
if (AddRec->isAffine()) {
- const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+ const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
auto RangeFromAffine = getRangeForAffineAR(
@@ -6523,7 +6543,7 @@ unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L,
unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
const auto *MaxExitCount =
- dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L));
+ dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L));
return getConstantTripCount(MaxExitCount);
}
@@ -6599,7 +6619,7 @@ const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
/// Similar to getBackedgeTakenCount, except return the least SCEV value that is
/// known never to be less than the actual backedge taken count.
-const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
+const SCEV *ScalarEvolution::getConstantMaxBackedgeTakenCount(const Loop *L) {
return getBackedgeTakenInfo(L).getMax(this);
}
@@ -9833,6 +9853,10 @@ Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
// We avoid subtracting expressions here because this function is usually
// fairly deep in the call stack (i.e. is called many times).
+ // X - X = 0.
+ if (More == Less)
+ return APInt(getTypeSizeInBits(More->getType()), 0);
+
if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
const auto *LAR = cast<SCEVAddRecExpr>(Less);
const auto *MAR = cast<SCEVAddRecExpr>(More);
@@ -10314,10 +10338,43 @@ bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
return false;
}
+static bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ // zext x u<= sext x, sext x s<= zext x
+ switch (Pred) {
+ case ICmpInst::ICMP_SGE:
+ std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_SLE: {
+ // If operand >=s 0 then ZExt == SExt. If operand <s 0 then SExt <s ZExt.
+ const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(LHS);
+ const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(RHS);
+ if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
+ return true;
+ break;
+ }
+ case ICmpInst::ICMP_UGE:
+ std::swap(LHS, RHS);
+ LLVM_FALLTHROUGH;
+ case ICmpInst::ICMP_ULE: {
+ // If operand >=s 0 then ZExt == SExt. If operand <s 0 then ZExt <u SExt.
+ const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS);
+ const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(RHS);
+ if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand())
+ return true;
+ break;
+ }
+ default:
+ break;
+ };
+ return false;
+}
+
bool
ScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
- return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
+ return isKnownPredicateExtendIdiom(Pred, LHS, RHS) ||
+ isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
@@ -11434,8 +11491,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
- if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
- OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+ if (!isa<SCEVCouldNotCompute>(SE->getConstantMaxBackedgeTakenCount(L))) {
+ OS << "max backedge-taken count is " << *SE->getConstantMaxBackedgeTakenCount(L);
if (SE->isBackedgeTakenCountMaxOrZero(L))
OS << ", actual taken count either this or zero.";
} else {
@@ -11901,14 +11958,14 @@ void ScalarEvolution::verify() const {
SE.getTypeSizeInBits(NewBECount->getType()))
CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType());
- auto *ConstantDelta =
- dyn_cast<SCEVConstant>(SE2.getMinusSCEV(CurBECount, NewBECount));
+ const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount);
- if (ConstantDelta && ConstantDelta->getAPInt() != 0) {
- dbgs() << "Trip Count Changed!\n";
+ // Unless VerifySCEVStrict is set, we only compare constant deltas.
+ if ((VerifySCEVStrict || isa<SCEVConstant>(Delta)) && !Delta->isZero()) {
+ dbgs() << "Trip Count for " << *L << " Changed!\n";
dbgs() << "Old: " << *CurBECount << "\n";
dbgs() << "New: " << *NewBECount << "\n";
- dbgs() << "Delta: " << *ConstantDelta << "\n";
+ dbgs() << "Delta: " << *Delta << "\n";
std::abort();
}
}
@@ -11959,7 +12016,7 @@ ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
SE.reset(new ScalarEvolution(
- F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index e8a95d35482c..bceec921188e 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -240,9 +240,6 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
/// division. If so, update S with Factor divided out and return true.
/// S need not be evenly divisible if a reasonable remainder can be
/// computed.
-/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
-/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
-/// check to see if the divide was folded.
static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
const SCEV *Factor, ScalarEvolution &SE,
const DataLayout &DL) {
@@ -1486,7 +1483,18 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
}
Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
- if (!CanonicalMode) return expandAddRecExprLiterally(S);
+ // In canonical mode we compute the addrec as an expression of a canonical IV
+ // using evaluateAtIteration and expand the resulting SCEV expression. This
+ // way we avoid introducing new IVs to carry on the comutation of the addrec
+ // throughout the loop.
+ //
+ // For nested addrecs evaluateAtIteration might need a canonical IV of a
+ // type wider than the addrec itself. Emitting a canonical IV of the
+ // proper type might produce non-legal types, for example expanding an i64
+ // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
+ // back to non-canonical mode for nested addrecs.
+ if (!CanonicalMode || (S->getNumOperands() > 2))
+ return expandAddRecExprLiterally(S);
Type *Ty = SE.getEffectiveSCEVType(S->getType());
const Loop *L = S->getLoop();
@@ -2094,11 +2102,10 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
for (BasicBlock *BB : ExitingBlocks) {
ICmpInst::Predicate Pred;
Instruction *LHS, *RHS;
- BasicBlock *TrueBB, *FalseBB;
if (!match(BB->getTerminator(),
m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
- TrueBB, FalseBB)))
+ m_BasicBlock(), m_BasicBlock())))
continue;
if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
diff --git a/lib/Analysis/StackSafetyAnalysis.cpp b/lib/Analysis/StackSafetyAnalysis.cpp
index 4cf235db86eb..1b3638698950 100644
--- a/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/lib/Analysis/StackSafetyAnalysis.cpp
@@ -333,8 +333,8 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) {
// FIXME: consult devirt?
// Do not follow aliases, otherwise we could inadvertently follow
// dso_preemptable aliases or aliases with interposable linkage.
- const GlobalValue *Callee = dyn_cast<GlobalValue>(
- CS.getCalledValue()->stripPointerCastsNoFollowAliases());
+ const GlobalValue *Callee =
+ dyn_cast<GlobalValue>(CS.getCalledValue()->stripPointerCasts());
if (!Callee) {
US.updateRange(UnknownRange);
return false;
diff --git a/lib/Analysis/SyncDependenceAnalysis.cpp b/lib/Analysis/SyncDependenceAnalysis.cpp
index 3cf248a31142..8447dc87069d 100644
--- a/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -218,9 +218,11 @@ struct DivergencePropagator {
template <typename SuccessorIterable>
std::unique_ptr<ConstBlockSet>
computeJoinPoints(const BasicBlock &RootBlock,
- SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
+ SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
assert(JoinBlocks);
+ LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints. Parent loop: " << (ParentLoop ? ParentLoop->getName() : "<null>") << "\n" );
+
// bootstrap with branch targets
for (const auto *SuccBlock : NodeSuccessors) {
DefMap.emplace(SuccBlock, SuccBlock);
@@ -228,13 +230,19 @@ struct DivergencePropagator {
if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
// immediate loop exit from node.
ReachedLoopExits.insert(SuccBlock);
- continue;
} else {
// regular successor
PendingUpdates.insert(SuccBlock);
}
}
+ LLVM_DEBUG(
+ dbgs() << "SDA: rpo order:\n";
+ for (const auto * RpoBlock : FuncRPOT) {
+ dbgs() << "- " << RpoBlock->getName() << "\n";
+ }
+ );
+
auto ItBeginRPO = FuncRPOT.begin();
// skip until term (TODO RPOT won't let us start at @term directly)
@@ -245,16 +253,18 @@ struct DivergencePropagator {
// propagate definitions at the immediate successors of the node in RPO
auto ItBlockRPO = ItBeginRPO;
- while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
+ while ((++ItBlockRPO != ItEndRPO) &&
+ !PendingUpdates.empty()) {
const auto *Block = *ItBlockRPO;
+ LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
- // skip @block if not pending update
+ // skip Block if not pending update
auto ItPending = PendingUpdates.find(Block);
if (ItPending == PendingUpdates.end())
continue;
PendingUpdates.erase(ItPending);
- // propagate definition at @block to its successors
+ // propagate definition at Block to its successors
auto ItDef = DefMap.find(Block);
const auto *DefBlock = ItDef->second;
assert(DefBlock);
@@ -278,6 +288,8 @@ struct DivergencePropagator {
}
}
+ LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
+
// We need to know the definition at the parent loop header to decide
// whether the definition at the header is different from the definition at
// the loop exits, which would indicate a divergent loop exits.
@@ -292,24 +304,17 @@ struct DivergencePropagator {
// |
// proper exit from both loops
//
- // D post-dominates B as it is the only proper exit from the "A loop".
- // If C has a divergent branch, propagation will therefore stop at D.
- // That implies that B will never receive a definition.
- // But that definition can only be the same as at D (D itself in thise case)
- // because all paths to anywhere have to pass through D.
- //
- const BasicBlock *ParentLoopHeader =
- ParentLoop ? ParentLoop->getHeader() : nullptr;
- if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
- DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
- }
-
// analyze reached loop exits
if (!ReachedLoopExits.empty()) {
+ const BasicBlock *ParentLoopHeader =
+ ParentLoop ? ParentLoop->getHeader() : nullptr;
+
assert(ParentLoop);
- const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
+ auto ItHeaderDef = DefMap.find(ParentLoopHeader);
+ const auto *HeaderDefBlock = (ItHeaderDef == DefMap.end()) ? nullptr : ItHeaderDef->second;
+
LLVM_DEBUG(printDefs(dbgs()));
- assert(HeaderDefBlock && "no definition in header of carrying loop");
+ assert(HeaderDefBlock && "no definition at header of carrying loop");
for (const auto *ExitBlock : ReachedLoopExits) {
auto ItExitDef = DefMap.find(ExitBlock);
@@ -339,19 +344,10 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
return *ItCached->second;
}
- // dont propagte beyond the immediate post dom of the loop
- const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
- const auto *IpdNode = PdNode->getIDom();
- const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
- while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
- IpdNode = IpdNode->getIDom();
- PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
- }
-
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
- *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
+ *Loop.getHeader(), LoopExits, Loop.getParentLoop());
auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
assert(ItInserted.second);
@@ -370,16 +366,11 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
if (ItCached != CachedBranchJoins.end())
return *ItCached->second;
- // dont propagate beyond the immediate post dominator of the branch
- const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
- const auto *IpdNode = PdNode->getIDom();
- const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
-
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
const auto &TermBlock = *Term.getParent();
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
- TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
+ TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
assert(ItInserted.second);
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index ef139d3257d2..230969698054 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -28,7 +28,8 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
"Intel SVML library")));
-StringRef const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
+StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
+ {
#define TLI_DEFINE_STRING
#include "llvm/Analysis/TargetLibraryInfo.def"
};
@@ -58,14 +59,14 @@ static bool hasBcmp(const Triple &TT) {
return TT.isGNUEnvironment() || TT.isMusl();
// Both NetBSD and OpenBSD are planning to remove the function. Windows does
// not have it.
- return TT.isOSFreeBSD() || TT.isOSSolaris() || TT.isOSDarwin();
+ return TT.isOSFreeBSD() || TT.isOSSolaris();
}
/// Initialize the set of available library functions based on the specified
/// target triple. This should be carefully written so that a missing target
/// triple gets a sane set of defaults.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
- ArrayRef<StringRef> StandardNames) {
+ ArrayRef<StringLiteral> StandardNames) {
// Verify that the StandardNames array is in alphabetical order.
assert(std::is_sorted(StandardNames.begin(), StandardNames.end(),
[](StringRef LHS, StringRef RHS) {
@@ -104,19 +105,10 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setShouldSignExtI32Param(ShouldSignExtI32Param);
if (T.getArch() == Triple::r600 ||
- T.getArch() == Triple::amdgcn) {
- TLI.setUnavailable(LibFunc_ldexp);
- TLI.setUnavailable(LibFunc_ldexpf);
- TLI.setUnavailable(LibFunc_ldexpl);
- TLI.setUnavailable(LibFunc_exp10);
- TLI.setUnavailable(LibFunc_exp10f);
- TLI.setUnavailable(LibFunc_exp10l);
- TLI.setUnavailable(LibFunc_log10);
- TLI.setUnavailable(LibFunc_log10f);
- TLI.setUnavailable(LibFunc_log10l);
- }
+ T.getArch() == Triple::amdgcn)
+ TLI.disableAllFunctions();
- // There are no library implementations of mempcy and memset for AMD gpus and
+ // There are no library implementations of memcpy and memset for AMD gpus and
// these can be difficult to lower in the backend.
if (T.getArch() == Triple::r600 ||
T.getArch() == Triple::amdgcn) {
@@ -623,19 +615,14 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
return GlobalValue::dropLLVMManglingEscape(funcName);
}
-bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
- LibFunc &F) const {
- StringRef const *Start = &StandardNames[0];
- StringRef const *End = &StandardNames[NumLibFuncs];
-
+bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, LibFunc &F) const {
funcName = sanitizeFunctionName(funcName);
if (funcName.empty())
return false;
- StringRef const *I = std::lower_bound(
- Start, End, funcName, [](StringRef LHS, StringRef RHS) {
- return LHS < RHS;
- });
+ const auto *Start = std::begin(StandardNames);
+ const auto *End = std::end(StandardNames);
+ const auto *I = std::lower_bound(Start, End, funcName);
if (I != End && *I == funcName) {
F = (LibFunc)(I - Start);
return true;
@@ -1481,6 +1468,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return false;
}
case LibFunc::NumLibFuncs:
+ case LibFunc::NotLibFunc:
break;
}
@@ -1599,14 +1587,6 @@ StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
return I->ScalarFnName;
}
-TargetLibraryInfo TargetLibraryAnalysis::run(Module &M,
- ModuleAnalysisManager &) {
- if (PresetInfoImpl)
- return TargetLibraryInfo(*PresetInfoImpl);
-
- return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple())));
-}
-
TargetLibraryInfo TargetLibraryAnalysis::run(Function &F,
FunctionAnalysisManager &) {
if (PresetInfoImpl)
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index eb04c34453fb..c9c294873ea6 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -9,6 +9,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -59,11 +60,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
SmallVector<BasicBlock *, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
- IE = ExitingBlocks.end();
- I != IE; ++I) {
- BasicBlock *BB = *I;
-
+ for (BasicBlock *BB : ExitingBlocks) {
// If we pass the updated counter back through a phi, we need to know
// which latch the updated value will be coming from.
if (!L->isLoopLatch(BB)) {
@@ -97,13 +94,11 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
// For this to be true, we must dominate all blocks with backedges. Such
// blocks are in-loop predecessors to the header block.
bool NotAlways = false;
- for (pred_iterator PI = pred_begin(L->getHeader()),
- PIE = pred_end(L->getHeader());
- PI != PIE; ++PI) {
- if (!L->contains(*PI))
+ for (BasicBlock *Pred : predecessors(L->getHeader())) {
+ if (!L->contains(Pred))
continue;
- if (!DT.dominates(*I, *PI)) {
+ if (!DT.dominates(BB, Pred)) {
NotAlways = true;
break;
}
@@ -127,7 +122,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
// Note that this block may not be the loop latch block, even if the loop
// has a latch block.
- ExitBlock = *I;
+ ExitBlock = BB;
ExitCount = EC;
break;
}
@@ -227,6 +222,16 @@ unsigned TargetTransformInfo::getFlatAddressSpace() const {
return TTIImpl->getFlatAddressSpace();
}
+bool TargetTransformInfo::collectFlatAddressOperands(
+ SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const {
+ return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
+}
+
+bool TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
+ IntrinsicInst *II, Value *OldV, Value *NewV) const {
+ return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
+}
+
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return TTIImpl->isLoweredToCall(F);
}
@@ -283,21 +288,22 @@ bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const {
return TTIImpl->shouldFavorBackedgeIndex(L);
}
-bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
- return TTIImpl->isLegalMaskedStore(DataType);
+bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
+ MaybeAlign Alignment) const {
+ return TTIImpl->isLegalMaskedStore(DataType, Alignment);
}
-bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
- return TTIImpl->isLegalMaskedLoad(DataType);
+bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
+ MaybeAlign Alignment) const {
+ return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
}
bool TargetTransformInfo::isLegalNTStore(Type *DataType,
- unsigned Alignment) const {
+ Align Alignment) const {
return TTIImpl->isLegalNTStore(DataType, Alignment);
}
-bool TargetTransformInfo::isLegalNTLoad(Type *DataType,
- unsigned Alignment) const {
+bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
return TTIImpl->isLegalNTLoad(DataType, Alignment);
}
@@ -359,14 +365,6 @@ bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
return TTIImpl->isTypeLegal(Ty);
}
-unsigned TargetTransformInfo::getJumpBufAlignment() const {
- return TTIImpl->getJumpBufAlignment();
-}
-
-unsigned TargetTransformInfo::getJumpBufSize() const {
- return TTIImpl->getJumpBufSize();
-}
-
bool TargetTransformInfo::shouldBuildLookupTables() const {
return TTIImpl->shouldBuildLookupTables();
}
@@ -470,8 +468,16 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
return Cost;
}
-unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
- return TTIImpl->getNumberOfRegisters(Vector);
+unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
+ return TTIImpl->getNumberOfRegisters(ClassID);
+}
+
+unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const {
+ return TTIImpl->getRegisterClassForType(Vector, Ty);
+}
+
+const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
+ return TTIImpl->getRegisterClassName(ClassID);
}
unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
@@ -1276,6 +1282,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
return getVectorInstrCost(I->getOpcode(),
IE->getType(), Idx);
}
+ case Instruction::ExtractValue:
+ return 0; // Model all ExtractValue nodes as free.
case Instruction::ShuffleVector: {
const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
Type *Ty = Shuffle->getType();
diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp
index 9311dfbc6eba..072d291f3f93 100644
--- a/lib/Analysis/TypeMetadataUtils.cpp
+++ b/lib/Analysis/TypeMetadataUtils.cpp
@@ -127,3 +127,35 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr,
Offset->getZExtValue(), CI, DT);
}
+
+Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) {
+ if (I->getType()->isPointerTy()) {
+ if (Offset == 0)
+ return I;
+ return nullptr;
+ }
+
+ const DataLayout &DL = M.getDataLayout();
+
+ if (auto *C = dyn_cast<ConstantStruct>(I)) {
+ const StructLayout *SL = DL.getStructLayout(C->getType());
+ if (Offset >= SL->getSizeInBytes())
+ return nullptr;
+
+ unsigned Op = SL->getElementContainingOffset(Offset);
+ return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
+ Offset - SL->getElementOffset(Op), M);
+ }
+ if (auto *C = dyn_cast<ConstantArray>(I)) {
+ ArrayType *VTableTy = C->getType();
+ uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType());
+
+ unsigned Op = Offset / ElemSize;
+ if (Op >= C->getNumOperands())
+ return nullptr;
+
+ return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
+ Offset % ElemSize, M);
+ }
+ return nullptr;
+}
diff --git a/lib/Analysis/VFABIDemangling.cpp b/lib/Analysis/VFABIDemangling.cpp
new file mode 100644
index 000000000000..6fd8ae63f5f0
--- /dev/null
+++ b/lib/Analysis/VFABIDemangling.cpp
@@ -0,0 +1,418 @@
+//===- VFABIDemangling.cpp - Vector Function ABI demangling utilities. ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/VectorUtils.h"
+
+using namespace llvm;
+
+namespace {
+/// Utilities for the Vector Function ABI name parser.
+
+/// Return types for the parser functions.
+enum class ParseRet {
+ OK, // Found.
+ None, // Not found.
+ Error // Syntax error.
+};
+
+/// Extracts the `<isa>` information from the mangled string, and
+/// sets the `ISA` accordingly.
+ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) {
+ if (MangledName.empty())
+ return ParseRet::Error;
+
+ ISA = StringSwitch<VFISAKind>(MangledName.take_front(1))
+ .Case("n", VFISAKind::AdvancedSIMD)
+ .Case("s", VFISAKind::SVE)
+ .Case("b", VFISAKind::SSE)
+ .Case("c", VFISAKind::AVX)
+ .Case("d", VFISAKind::AVX2)
+ .Case("e", VFISAKind::AVX512)
+ .Default(VFISAKind::Unknown);
+
+ MangledName = MangledName.drop_front(1);
+
+ return ParseRet::OK;
+}
+
+/// Extracts the `<mask>` information from the mangled string, and
+/// sets `IsMasked` accordingly. The input string `MangledName` is
+/// left unmodified.
+ParseRet tryParseMask(StringRef &MangledName, bool &IsMasked) {
+ if (MangledName.consume_front("M")) {
+ IsMasked = true;
+ return ParseRet::OK;
+ }
+
+ if (MangledName.consume_front("N")) {
+ IsMasked = false;
+ return ParseRet::OK;
+ }
+
+ return ParseRet::Error;
+}
+
+/// Extract the `<vlen>` information from the mangled string, and
+/// sets `VF` accordingly. A `<vlen> == "x"` token is interpreted as a scalable
+/// vector length. On success, the `<vlen>` token is removed from
+/// the input string `ParseString`.
+///
+ParseRet tryParseVLEN(StringRef &ParseString, unsigned &VF, bool &IsScalable) {
+ if (ParseString.consume_front("x")) {
+ VF = 0;
+ IsScalable = true;
+ return ParseRet::OK;
+ }
+
+ if (ParseString.consumeInteger(10, VF))
+ return ParseRet::Error;
+
+ IsScalable = false;
+ return ParseRet::OK;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+/// <token> <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `Pos` to
+/// <number>, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+///
+/// The function expects <token> to be one of "ls", "Rs", "Us" or
+/// "Ls".
+ParseRet tryParseLinearTokenWithRuntimeStep(StringRef &ParseString,
+ VFParamKind &PKind, int &Pos,
+ const StringRef Token) {
+ if (ParseString.consume_front(Token)) {
+ PKind = VFABI::getVFParamKindFromString(Token);
+ if (ParseString.consumeInteger(10, Pos))
+ return ParseRet::Error;
+ return ParseRet::OK;
+ }
+
+ return ParseRet::None;
+}
+
+/// The function looks for the following stringt at the beginning of
+/// the input string `ParseString`:
+///
+/// <token> <number>
+///
+/// <token> is one of "ls", "Rs", "Us" or "Ls".
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `StepOrPos` to
+/// <number>, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseLinearWithRuntimeStep(StringRef &ParseString,
+ VFParamKind &PKind, int &StepOrPos) {
+ ParseRet Ret;
+
+ // "ls" <RuntimeStepPos>
+ Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "ls");
+ if (Ret != ParseRet::None)
+ return Ret;
+
+ // "Rs" <RuntimeStepPos>
+ Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Rs");
+ if (Ret != ParseRet::None)
+ return Ret;
+
+ // "Ls" <RuntimeStepPos>
+ Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Ls");
+ if (Ret != ParseRet::None)
+ return Ret;
+
+ // "Us" <RuntimeStepPos>
+ Ret = tryParseLinearTokenWithRuntimeStep(ParseString, PKind, StepOrPos, "Us");
+ if (Ret != ParseRet::None)
+ return Ret;
+
+ return ParseRet::None;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+/// <token> {"n"} <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `LinearStep` to
+/// <number>, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+///
+/// The function expects <token> to be one of "l", "R", "U" or
+/// "L".
+ParseRet tryParseCompileTimeLinearToken(StringRef &ParseString,
+ VFParamKind &PKind, int &LinearStep,
+ const StringRef Token) {
+ if (ParseString.consume_front(Token)) {
+ PKind = VFABI::getVFParamKindFromString(Token);
+ const bool Negate = ParseString.consume_front("n");
+ if (ParseString.consumeInteger(10, LinearStep))
+ LinearStep = 1;
+ if (Negate)
+ LinearStep *= -1;
+ return ParseRet::OK;
+ }
+
+ return ParseRet::None;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+/// ["l" | "R" | "U" | "L"] {"n"} <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `LinearStep` to
+/// <number>, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseLinearWithCompileTimeStep(StringRef &ParseString,
+ VFParamKind &PKind, int &StepOrPos) {
+ // "l" {"n"} <CompileTimeStep>
+ if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "l") ==
+ ParseRet::OK)
+ return ParseRet::OK;
+
+ // "R" {"n"} <CompileTimeStep>
+ if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "R") ==
+ ParseRet::OK)
+ return ParseRet::OK;
+
+ // "L" {"n"} <CompileTimeStep>
+ if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "L") ==
+ ParseRet::OK)
+ return ParseRet::OK;
+
+ // "U" {"n"} <CompileTimeStep>
+ if (tryParseCompileTimeLinearToken(ParseString, PKind, StepOrPos, "U") ==
+ ParseRet::OK)
+ return ParseRet::OK;
+
+ return ParseRet::None;
+}
+
+/// The function looks for the following strings at the beginning of
+/// the input string `ParseString`:
+///
+/// "u" <number>
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `Pos` to
+/// <number>, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseUniform(StringRef &ParseString, VFParamKind &PKind, int &Pos) {
+ // "u" <Pos>
+ const char *UniformToken = "u";
+ if (ParseString.consume_front(UniformToken)) {
+ PKind = VFABI::getVFParamKindFromString(UniformToken);
+ if (ParseString.consumeInteger(10, Pos))
+ return ParseRet::Error;
+
+ return ParseRet::OK;
+ }
+ return ParseRet::None;
+}
+
+/// Looks into the <parameters> part of the mangled name in search
+/// for valid paramaters at the beginning of the string
+/// `ParseString`.
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `StepOrPos`
+/// accordingly, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseParameter(StringRef &ParseString, VFParamKind &PKind,
+ int &StepOrPos) {
+ if (ParseString.consume_front("v")) {
+ PKind = VFParamKind::Vector;
+ StepOrPos = 0;
+ return ParseRet::OK;
+ }
+
+ const ParseRet HasLinearRuntime =
+ tryParseLinearWithRuntimeStep(ParseString, PKind, StepOrPos);
+ if (HasLinearRuntime != ParseRet::None)
+ return HasLinearRuntime;
+
+ const ParseRet HasLinearCompileTime =
+ tryParseLinearWithCompileTimeStep(ParseString, PKind, StepOrPos);
+ if (HasLinearCompileTime != ParseRet::None)
+ return HasLinearCompileTime;
+
+ const ParseRet HasUniform = tryParseUniform(ParseString, PKind, StepOrPos);
+ if (HasUniform != ParseRet::None)
+ return HasUniform;
+
+ return ParseRet::None;
+}
+
+/// Looks into the <parameters> part of the mangled name in search
+/// of a valid 'aligned' clause. The function should be invoked
+/// after parsing a parameter via `tryParseParameter`.
+///
+/// On success, it removes the parsed parameter from `ParseString`,
+/// sets `PKind` to the correspondent enum value, sets `StepOrPos`
+/// accordingly, and return success. On a syntax error, it return a
+/// parsing error. If nothing is parsed, it returns None.
+ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) {
+ uint64_t Val;
+ // "a" <number>
+ if (ParseString.consume_front("a")) {
+ if (ParseString.consumeInteger(10, Val))
+ return ParseRet::Error;
+
+ if (!isPowerOf2_64(Val))
+ return ParseRet::Error;
+
+ Alignment = Align(Val);
+
+ return ParseRet::OK;
+ }
+
+ return ParseRet::None;
+}
+} // namespace
+
+// Format of the ABI name:
+// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
+Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName) {
+ // Assume there is no custom name <redirection>, and therefore the
+ // vector name consists of
+ // _ZGV<isa><mask><vlen><parameters>_<scalarname>.
+ StringRef VectorName = MangledName;
+
+ // Parse the fixed size part of the manled name
+ if (!MangledName.consume_front("_ZGV"))
+ return None;
+
+ // Extract ISA. An unknow ISA is also supported, so we accept all
+ // values.
+ VFISAKind ISA;
+ if (tryParseISA(MangledName, ISA) != ParseRet::OK)
+ return None;
+
+ // Extract <mask>.
+ bool IsMasked;
+ if (tryParseMask(MangledName, IsMasked) != ParseRet::OK)
+ return None;
+
+ // Parse the variable size, starting from <vlen>.
+ unsigned VF;
+ bool IsScalable;
+ if (tryParseVLEN(MangledName, VF, IsScalable) != ParseRet::OK)
+ return None;
+
+ // Parse the <parameters>.
+ ParseRet ParamFound;
+ SmallVector<VFParameter, 8> Parameters;
+ do {
+ const unsigned ParameterPos = Parameters.size();
+ VFParamKind PKind;
+ int StepOrPos;
+ ParamFound = tryParseParameter(MangledName, PKind, StepOrPos);
+
+ // Bail off if there is a parsing error in the parsing of the parameter.
+ if (ParamFound == ParseRet::Error)
+ return None;
+
+ if (ParamFound == ParseRet::OK) {
+ Align Alignment;
+ // Look for the alignment token "a <number>".
+ const ParseRet AlignFound = tryParseAlign(MangledName, Alignment);
+ // Bail off if there is a syntax error in the align token.
+ if (AlignFound == ParseRet::Error)
+ return None;
+
+ // Add the parameter.
+ Parameters.push_back({ParameterPos, PKind, StepOrPos, Alignment});
+ }
+ } while (ParamFound == ParseRet::OK);
+
+ // A valid MangledName mus have at least one valid entry in the
+ // <parameters>.
+ if (Parameters.empty())
+ return None;
+
+ // Check for the <scalarname> and the optional <redirection>, which
+ // are separated from the prefix with "_"
+ if (!MangledName.consume_front("_"))
+ return None;
+
+ // The rest of the string must be in the format:
+ // <scalarname>[(<redirection>)]
+ const StringRef ScalarName =
+ MangledName.take_while([](char In) { return In != '('; });
+
+ if (ScalarName.empty())
+ return None;
+
+ // Reduce MangledName to [(<redirection>)].
+ MangledName = MangledName.ltrim(ScalarName);
+ // Find the optional custom name redirection.
+ if (MangledName.consume_front("(")) {
+ if (!MangledName.consume_back(")"))
+ return None;
+ // Update the vector variant with the one specified by the user.
+ VectorName = MangledName;
+ // If the vector name is missing, bail out.
+ if (VectorName.empty())
+ return None;
+ }
+
+ // When <mask> is "M", we need to add a parameter that is used as
+ // global predicate for the function.
+ if (IsMasked) {
+ const unsigned Pos = Parameters.size();
+ Parameters.push_back({Pos, VFParamKind::GlobalPredicate});
+ }
+
+ // Asserts for parameters of type `VFParamKind::GlobalPredicate`, as
+ // prescribed by the Vector Function ABI specifications supported by
+ // this parser:
+ // 1. Uniqueness.
+ // 2. Must be the last in the parameter list.
+ const auto NGlobalPreds = std::count_if(
+ Parameters.begin(), Parameters.end(), [](const VFParameter PK) {
+ return PK.ParamKind == VFParamKind::GlobalPredicate;
+ });
+ assert(NGlobalPreds < 2 && "Cannot have more than one global predicate.");
+ if (NGlobalPreds)
+ assert(Parameters.back().ParamKind == VFParamKind::GlobalPredicate &&
+ "The global predicate must be the last parameter");
+
+ const VFShape Shape({VF, IsScalable, ISA, Parameters});
+ return VFInfo({Shape, ScalarName, VectorName});
+}
+
+VFParamKind VFABI::getVFParamKindFromString(const StringRef Token) {
+ const VFParamKind ParamKind = StringSwitch<VFParamKind>(Token)
+ .Case("v", VFParamKind::Vector)
+ .Case("l", VFParamKind::OMP_Linear)
+ .Case("R", VFParamKind::OMP_LinearRef)
+ .Case("L", VFParamKind::OMP_LinearVal)
+ .Case("U", VFParamKind::OMP_LinearUVal)
+ .Case("ls", VFParamKind::OMP_LinearPos)
+ .Case("Ls", VFParamKind::OMP_LinearValPos)
+ .Case("Rs", VFParamKind::OMP_LinearRefPos)
+ .Case("Us", VFParamKind::OMP_LinearUValPos)
+ .Case("u", VFParamKind::OMP_Uniform)
+ .Default(VFParamKind::Unknown);
+
+ if (ParamKind != VFParamKind::Unknown)
+ return ParamKind;
+
+ // This function should never be invoked with an invalid input.
+ llvm_unreachable("This fuction should be invoken only on parameters"
+ " that have a textual representation in the mangled name"
+ " of the Vector Function ABI");
+}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index c70906dcc629..bbf389991836 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -558,12 +558,18 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
return true;
}
+ // Don't let an assume affect itself - this would cause the problems
+ // `isEphemeralValueOf` is trying to prevent, and it would also make
+ // the loop below go out of bounds.
+ if (Inv == CxtI)
+ return false;
+
// The context comes first, but they're both in the same block. Make sure
// there is nothing in between that might interrupt the control flow.
for (BasicBlock::const_iterator I =
std::next(BasicBlock::const_iterator(CxtI)), IE(Inv);
I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
+ if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
return false;
return !isEphemeralValueOf(Inv, CxtI);
@@ -1049,7 +1055,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
break;
}
case Instruction::Select: {
- const Value *LHS, *RHS;
+ const Value *LHS = nullptr, *RHS = nullptr;
SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
if (SelectPatternResult::isMinOrMax(SPF)) {
computeKnownBits(RHS, Known, Depth + 1, Q);
@@ -1095,7 +1101,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// RHS from matchSelectPattern returns the negation part of abs pattern.
// If the negate has an NSW flag we can assume the sign bit of the result
// will be 0 because that makes abs(INT_MIN) undefined.
- if (Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
+ if (match(RHS, m_Neg(m_Specific(LHS))) &&
+ Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
MaxHighZeros = 1;
}
@@ -1366,7 +1373,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
else if (LR == I)
L = LL;
else
- break;
+ continue; // Check for recurrence with L and R flipped.
// Ok, we have a PHI of the form L op= R. Check for low
// zero bits.
computeKnownBits(R, Known2, Depth + 1, Q);
@@ -1714,9 +1721,9 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
// Aligned pointers have trailing zeros - refine Known.Zero set
if (V->getType()->isPointerTy()) {
- unsigned Align = V->getPointerAlignment(Q.DL);
+ const MaybeAlign Align = V->getPointerAlignment(Q.DL);
if (Align)
- Known.Zero.setLowBits(countTrailingZeros(Align));
+ Known.Zero.setLowBits(countTrailingZeros(Align->value()));
}
// computeKnownBitsFromAssume strictly refines Known.
@@ -2066,7 +2073,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (const auto *Call = dyn_cast<CallBase>(V)) {
if (Call->isReturnNonNull())
return true;
- if (const auto *RP = getArgumentAliasingToReturnedPointer(Call))
+ if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
return isKnownNonZero(RP, Depth, Q);
}
}
@@ -2300,7 +2307,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
cast<Operator>(Select)->getOpcode() == Instruction::Select &&
"Input should be a Select!");
- const Value *LHS, *RHS, *LHS2, *RHS2;
+ const Value *LHS = nullptr, *RHS = nullptr;
SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
if (SPF != SPF_SMAX && SPF != SPF_SMIN)
return false;
@@ -2308,6 +2315,7 @@ static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
if (!match(RHS, m_APInt(CLow)))
return false;
+ const Value *LHS2 = nullptr, *RHS2 = nullptr;
SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
if (getInverseMinMaxFlavor(SPF) != SPF2)
return false;
@@ -2384,253 +2392,256 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
if (Depth == MaxDepth)
return 1; // Limit search depth.
- const Operator *U = dyn_cast<Operator>(V);
- switch (Operator::getOpcode(V)) {
- default: break;
- case Instruction::SExt:
- Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
- return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
+ if (auto *U = dyn_cast<Operator>(V)) {
+ switch (Operator::getOpcode(V)) {
+ default: break;
+ case Instruction::SExt:
+ Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
+ return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
- case Instruction::SDiv: {
- const APInt *Denominator;
- // sdiv X, C -> adds log(C) sign bits.
- if (match(U->getOperand(1), m_APInt(Denominator))) {
+ case Instruction::SDiv: {
+ const APInt *Denominator;
+ // sdiv X, C -> adds log(C) sign bits.
+ if (match(U->getOperand(1), m_APInt(Denominator))) {
- // Ignore non-positive denominator.
- if (!Denominator->isStrictlyPositive())
- break;
+ // Ignore non-positive denominator.
+ if (!Denominator->isStrictlyPositive())
+ break;
- // Calculate the incoming numerator bits.
- unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ // Calculate the incoming numerator bits.
+ unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- // Add floor(log(C)) bits to the numerator bits.
- return std::min(TyBits, NumBits + Denominator->logBase2());
+ // Add floor(log(C)) bits to the numerator bits.
+ return std::min(TyBits, NumBits + Denominator->logBase2());
+ }
+ break;
}
- break;
- }
-
- case Instruction::SRem: {
- const APInt *Denominator;
- // srem X, C -> we know that the result is within [-C+1,C) when C is a
- // positive constant. This let us put a lower bound on the number of sign
- // bits.
- if (match(U->getOperand(1), m_APInt(Denominator))) {
-
- // Ignore non-positive denominator.
- if (!Denominator->isStrictlyPositive())
- break;
- // Calculate the incoming numerator bits. SRem by a positive constant
- // can't lower the number of sign bits.
- unsigned NumrBits =
- ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ case Instruction::SRem: {
+ const APInt *Denominator;
+ // srem X, C -> we know that the result is within [-C+1,C) when C is a
+ // positive constant. This let us put a lower bound on the number of sign
+ // bits.
+ if (match(U->getOperand(1), m_APInt(Denominator))) {
- // Calculate the leading sign bit constraints by examining the
- // denominator. Given that the denominator is positive, there are two
- // cases:
- //
- // 1. the numerator is positive. The result range is [0,C) and [0,C) u<
- // (1 << ceilLogBase2(C)).
- //
- // 2. the numerator is negative. Then the result range is (-C,0] and
- // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
- //
- // Thus a lower bound on the number of sign bits is `TyBits -
- // ceilLogBase2(C)`.
+ // Ignore non-positive denominator.
+ if (!Denominator->isStrictlyPositive())
+ break;
- unsigned ResBits = TyBits - Denominator->ceilLogBase2();
- return std::max(NumrBits, ResBits);
+ // Calculate the incoming numerator bits. SRem by a positive constant
+ // can't lower the number of sign bits.
+ unsigned NumrBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+
+ // Calculate the leading sign bit constraints by examining the
+ // denominator. Given that the denominator is positive, there are two
+ // cases:
+ //
+ // 1. the numerator is positive. The result range is [0,C) and [0,C) u<
+ // (1 << ceilLogBase2(C)).
+ //
+ // 2. the numerator is negative. Then the result range is (-C,0] and
+ // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
+ //
+ // Thus a lower bound on the number of sign bits is `TyBits -
+ // ceilLogBase2(C)`.
+
+ unsigned ResBits = TyBits - Denominator->ceilLogBase2();
+ return std::max(NumrBits, ResBits);
+ }
+ break;
}
- break;
- }
- case Instruction::AShr: {
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- // ashr X, C -> adds C sign bits. Vectors too.
- const APInt *ShAmt;
- if (match(U->getOperand(1), m_APInt(ShAmt))) {
- if (ShAmt->uge(TyBits))
- break; // Bad shift.
- unsigned ShAmtLimited = ShAmt->getZExtValue();
- Tmp += ShAmtLimited;
- if (Tmp > TyBits) Tmp = TyBits;
- }
- return Tmp;
- }
- case Instruction::Shl: {
- const APInt *ShAmt;
- if (match(U->getOperand(1), m_APInt(ShAmt))) {
- // shl destroys sign bits.
+ case Instruction::AShr: {
Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (ShAmt->uge(TyBits) || // Bad shift.
- ShAmt->uge(Tmp)) break; // Shifted all sign bits out.
- Tmp2 = ShAmt->getZExtValue();
- return Tmp - Tmp2;
+ // ashr X, C -> adds C sign bits. Vectors too.
+ const APInt *ShAmt;
+ if (match(U->getOperand(1), m_APInt(ShAmt))) {
+ if (ShAmt->uge(TyBits))
+ break; // Bad shift.
+ unsigned ShAmtLimited = ShAmt->getZExtValue();
+ Tmp += ShAmtLimited;
+ if (Tmp > TyBits) Tmp = TyBits;
+ }
+ return Tmp;
}
- break;
- }
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: // NOT is handled here.
- // Logical binary ops preserve the number of sign bits at the worst.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (Tmp != 1) {
- Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- FirstAnswer = std::min(Tmp, Tmp2);
- // We computed what we know about the sign bits as our first
- // answer. Now proceed to the generic code that uses
- // computeKnownBits, and pick whichever answer is better.
+ case Instruction::Shl: {
+ const APInt *ShAmt;
+ if (match(U->getOperand(1), m_APInt(ShAmt))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (ShAmt->uge(TyBits) || // Bad shift.
+ ShAmt->uge(Tmp)) break; // Shifted all sign bits out.
+ Tmp2 = ShAmt->getZExtValue();
+ return Tmp - Tmp2;
+ }
+ break;
}
- break;
-
- case Instruction::Select: {
- // If we have a clamp pattern, we know that the number of sign bits will be
- // the minimum of the clamp min/max range.
- const Value *X;
- const APInt *CLow, *CHigh;
- if (isSignedMinMaxClamp(U, X, CLow, CHigh))
- return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
-
- Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (Tmp == 1) break;
- Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
- return std::min(Tmp, Tmp2);
- }
-
- case Instruction::Add:
- // Add can have at most one carry bit. Thus we know that the output
- // is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (Tmp == 1) break;
-
- // Special case decrementing a value (ADD X, -1):
- if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
- if (CRHS->isAllOnesValue()) {
- KnownBits Known(TyBits);
- computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
-
- // If the input is known to be 0 or 1, the output is 0/-1, which is all
- // sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
- return TyBits;
-
- // If we are subtracting one from a positive number, there is no carry
- // out of the result.
- if (Known.isNonNegative())
- return Tmp;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // computeKnownBits, and pick whichever answer is better.
}
+ break;
- Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (Tmp2 == 1) break;
- return std::min(Tmp, Tmp2)-1;
+ case Instruction::Select: {
+ // If we have a clamp pattern, we know that the number of sign bits will
+ // be the minimum of the clamp min/max range.
+ const Value *X;
+ const APInt *CLow, *CHigh;
+ if (isSignedMinMaxClamp(U, X, CLow, CHigh))
+ return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
+
+ Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+ if (Tmp == 1) break;
+ Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
+ return std::min(Tmp, Tmp2);
+ }
- case Instruction::Sub:
- Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (Tmp2 == 1) break;
-
- // Handle NEG.
- if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
- if (CLHS->isNullValue()) {
- KnownBits Known(TyBits);
- computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
- // If the input is known to be 0 or 1, the output is 0/-1, which is all
- // sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
- return TyBits;
-
- // If the input is known to be positive (the sign bit is known clear),
- // the output of the NEG has the same number of sign bits as the input.
- if (Known.isNonNegative())
- return Tmp2;
-
- // Otherwise, we treat this like a SUB.
- }
+ case Instruction::Add:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (Tmp == 1) break;
+
+ // Special case decrementing a value (ADD X, -1):
+ if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ KnownBits Known(TyBits);
+ computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is
+ // all sign bits set.
+ if ((Known.Zero | 1).isAllOnesValue())
+ return TyBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (Known.isNonNegative())
+ return Tmp;
+ }
- // Sub can have at most one carry bit. Thus we know that the output
- // is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (Tmp == 1) break;
- return std::min(Tmp, Tmp2)-1;
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+ if (Tmp2 == 1) break;
+ return std::min(Tmp, Tmp2) - 1;
- case Instruction::Mul: {
- // The output of the Mul can be at most twice the valid bits in the inputs.
- unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
- if (SignBitsOp0 == 1) break;
- unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
- if (SignBitsOp1 == 1) break;
- unsigned OutValidBits =
- (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
- return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
- }
+ case Instruction::Sub:
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+ if (Tmp2 == 1) break;
+
+ // Handle NEG.
+ if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
+ if (CLHS->isNullValue()) {
+ KnownBits Known(TyBits);
+ computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is
+ // all sign bits set.
+ if ((Known.Zero | 1).isAllOnesValue())
+ return TyBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the
+ // input.
+ if (Known.isNonNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
- case Instruction::PHI: {
- const PHINode *PN = cast<PHINode>(U);
- unsigned NumIncomingValues = PN->getNumIncomingValues();
- // Don't analyze large in-degree PHIs.
- if (NumIncomingValues > 4) break;
- // Unreachable blocks may have zero-operand PHI nodes.
- if (NumIncomingValues == 0) break;
-
- // Take the minimum of all incoming values. This can't infinitely loop
- // because of our depth threshold.
- Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q);
- for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
- if (Tmp == 1) return Tmp;
- Tmp = std::min(
- Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q));
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (Tmp == 1) break;
+ return std::min(Tmp, Tmp2) - 1;
+
+ case Instruction::Mul: {
+ // The output of the Mul can be at most twice the valid bits in the
+ // inputs.
+ unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+ if (SignBitsOp0 == 1) break;
+ unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+ if (SignBitsOp1 == 1) break;
+ unsigned OutValidBits =
+ (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
+ return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
}
- return Tmp;
- }
- case Instruction::Trunc:
- // FIXME: it's tricky to do anything useful for this, but it is an important
- // case for targets like X86.
- break;
+ case Instruction::PHI: {
+ const PHINode *PN = cast<PHINode>(U);
+ unsigned NumIncomingValues = PN->getNumIncomingValues();
+ // Don't analyze large in-degree PHIs.
+ if (NumIncomingValues > 4) break;
+ // Unreachable blocks may have zero-operand PHI nodes.
+ if (NumIncomingValues == 0) break;
+
+ // Take the minimum of all incoming values. This can't infinitely loop
+ // because of our depth threshold.
+ Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q);
+ for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
+ if (Tmp == 1) return Tmp;
+ Tmp = std::min(
+ Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q));
+ }
+ return Tmp;
+ }
- case Instruction::ExtractElement:
- // Look through extract element. At the moment we keep this simple and skip
- // tracking the specific element. But at least we might find information
- // valid for all elements of the vector (for example if vector is sign
- // extended, shifted, etc).
- return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
-
- case Instruction::ShuffleVector: {
- // TODO: This is copied almost directly from the SelectionDAG version of
- // ComputeNumSignBits. It would be better if we could share common
- // code. If not, make sure that changes are translated to the DAG.
-
- // Collect the minimum number of sign bits that are shared by every vector
- // element referenced by the shuffle.
- auto *Shuf = cast<ShuffleVectorInst>(U);
- int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
- int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
- APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
- for (int i = 0; i != NumMaskElts; ++i) {
- int M = Shuf->getMaskValue(i);
- assert(M < NumElts * 2 && "Invalid shuffle mask constant");
- // For undef elements, we don't know anything about the common state of
- // the shuffle result.
- if (M == -1)
- return 1;
- if (M < NumElts)
- DemandedLHS.setBit(M % NumElts);
- else
- DemandedRHS.setBit(M % NumElts);
+ case Instruction::Trunc:
+ // FIXME: it's tricky to do anything useful for this, but it is an
+ // important case for targets like X86.
+ break;
+
+ case Instruction::ExtractElement:
+ // Look through extract element. At the moment we keep this simple and
+ // skip tracking the specific element. But at least we might find
+ // information valid for all elements of the vector (for example if vector
+ // is sign extended, shifted, etc).
+ return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+
+ case Instruction::ShuffleVector: {
+ // TODO: This is copied almost directly from the SelectionDAG version of
+ // ComputeNumSignBits. It would be better if we could share common
+ // code. If not, make sure that changes are translated to the DAG.
+
+ // Collect the minimum number of sign bits that are shared by every vector
+ // element referenced by the shuffle.
+ auto *Shuf = cast<ShuffleVectorInst>(U);
+ int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
+ int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
+ APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+ for (int i = 0; i != NumMaskElts; ++i) {
+ int M = Shuf->getMaskValue(i);
+ assert(M < NumElts * 2 && "Invalid shuffle mask constant");
+ // For undef elements, we don't know anything about the common state of
+ // the shuffle result.
+ if (M == -1)
+ return 1;
+ if (M < NumElts)
+ DemandedLHS.setBit(M % NumElts);
+ else
+ DemandedRHS.setBit(M % NumElts);
+ }
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (!!DemandedLHS)
+ Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q);
+ if (!!DemandedRHS) {
+ Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ // If we don't know anything, early out and try computeKnownBits
+ // fall-back.
+ if (Tmp == 1)
+ break;
+ assert(Tmp <= V->getType()->getScalarSizeInBits() &&
+ "Failed to determine minimum sign bits");
+ return Tmp;
}
- Tmp = std::numeric_limits<unsigned>::max();
- if (!!DemandedLHS)
- Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q);
- if (!!DemandedRHS) {
- Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q);
- Tmp = std::min(Tmp, Tmp2);
}
- // If we don't know anything, early out and try computeKnownBits fall-back.
- if (Tmp == 1)
- break;
- assert(Tmp <= V->getType()->getScalarSizeInBits() &&
- "Failed to determine minimum sign bits");
- return Tmp;
- }
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
@@ -2655,8 +2666,6 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
/// through SExt instructions only if LookThroughSExt is true.
bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
bool LookThroughSExt, unsigned Depth) {
- const unsigned MaxDepth = 6;
-
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
@@ -3651,23 +3660,28 @@ uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
return Len == ~0ULL ? 1 : Len;
}
-const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) {
+const Value *
+llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call,
+ bool MustPreserveNullness) {
assert(Call &&
"getArgumentAliasingToReturnedPointer only works on nonnull calls");
if (const Value *RV = Call->getReturnedArgOperand())
return RV;
// This can be used only as a aliasing property.
- if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call))
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
+ Call, MustPreserveNullness))
return Call->getArgOperand(0);
return nullptr;
}
bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
- const CallBase *Call) {
+ const CallBase *Call, bool MustPreserveNullness) {
return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
Call->getIntrinsicID() == Intrinsic::strip_invariant_group ||
Call->getIntrinsicID() == Intrinsic::aarch64_irg ||
- Call->getIntrinsicID() == Intrinsic::aarch64_tagp;
+ Call->getIntrinsicID() == Intrinsic::aarch64_tagp ||
+ (!MustPreserveNullness &&
+ Call->getIntrinsicID() == Intrinsic::ptrmask);
}
/// \p PN defines a loop-variant pointer to an object. Check if the
@@ -3725,7 +3739,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
// because it should be in sync with CaptureTracking. Not using it may
// cause weird miscompilations where 2 aliasing pointers are assumed to
// noalias.
- if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) {
V = RP;
continue;
}
@@ -3865,6 +3879,18 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
return true;
}
+bool llvm::mustSuppressSpeculation(const LoadInst &LI) {
+ if (!LI.isUnordered())
+ return true;
+ const Function &F = *LI.getFunction();
+ // Speculative load may create a race that did not exist in the source.
+ return F.hasFnAttribute(Attribute::SanitizeThread) ||
+ // Speculative load may load data from dirty regions.
+ F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress);
+}
+
+
bool llvm::isSafeToSpeculativelyExecute(const Value *V,
const Instruction *CtxI,
const DominatorTree *DT) {
@@ -3909,17 +3935,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
}
case Instruction::Load: {
const LoadInst *LI = cast<LoadInst>(Inst);
- if (!LI->isUnordered() ||
- // Speculative load may create a race that did not exist in the source.
- LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
- // Speculative load may load data from dirty regions.
- LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
- LI->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
+ if (mustSuppressSpeculation(*LI))
return false;
const DataLayout &DL = LI->getModule()->getDataLayout();
- return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
- LI->getType(), LI->getAlignment(),
- DL, CtxI, DT);
+ return isDereferenceableAndAlignedPointer(
+ LI->getPointerOperand(), LI->getType(), MaybeAlign(LI->getAlignment()),
+ DL, CtxI, DT);
}
case Instruction::Call: {
auto *CI = cast<const CallInst>(Inst);
@@ -4221,22 +4242,9 @@ OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS,
}
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
- // A memory operation returns normally if it isn't volatile. A volatile
- // operation is allowed to trap.
- //
- // An atomic operation isn't guaranteed to return in a reasonable amount of
- // time because it's possible for another thread to interfere with it for an
+ // Note: An atomic operation isn't guaranteed to return in a reasonable amount
+ // of time because it's possible for another thread to interfere with it for an
// arbitrary length of time, but programs aren't allowed to rely on that.
- if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- return !LI->isVolatile();
- if (const StoreInst *SI = dyn_cast<StoreInst>(I))
- return !SI->isVolatile();
- if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
- return !CXI->isVolatile();
- if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
- return !RMWI->isVolatile();
- if (const MemIntrinsic *MII = dyn_cast<MemIntrinsic>(I))
- return !MII->isVolatile();
// If there is no successor, then execution can't transfer to it.
if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
@@ -4277,10 +4285,7 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
// FIXME: This isn't aggressive enough; a call which only writes to a global
// is guaranteed to return.
- return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
- match(I, m_Intrinsic<Intrinsic::assume>()) ||
- match(I, m_Intrinsic<Intrinsic::sideeffect>()) ||
- match(I, m_Intrinsic<Intrinsic::experimental_widenable_condition>());
+ return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory();
}
// Other instructions return normally.
@@ -4572,12 +4577,12 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
// TODO: Allow FP min/max with nnan/nsz.
assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
- Value *A, *B;
+ Value *A = nullptr, *B = nullptr;
SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1);
if (!SelectPatternResult::isMinOrMax(L.Flavor))
return {SPF_UNKNOWN, SPNB_NA, false};
- Value *C, *D;
+ Value *C = nullptr, *D = nullptr;
SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1);
if (L.Flavor != R.Flavor)
return {SPF_UNKNOWN, SPNB_NA, false};
@@ -5627,8 +5632,8 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
}
static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
- APInt &Upper) {
- const Value *LHS, *RHS;
+ APInt &Upper, const InstrInfoQuery &IIQ) {
+ const Value *LHS = nullptr, *RHS = nullptr;
SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS);
if (R.Flavor == SPF_UNKNOWN)
return;
@@ -5640,7 +5645,8 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
// then the result of abs(X) is [0..SIGNED_MAX],
// otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
Lower = APInt::getNullValue(BitWidth);
- if (cast<Instruction>(RHS)->hasNoSignedWrap())
+ if (match(RHS, m_Neg(m_Specific(LHS))) &&
+ IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
Upper = APInt::getSignedMaxValue(BitWidth) + 1;
else
Upper = APInt::getSignedMinValue(BitWidth) + 1;
@@ -5694,7 +5700,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) {
else if (auto *II = dyn_cast<IntrinsicInst>(V))
setLimitsForIntrinsic(*II, Lower, Upper);
else if (auto *SI = dyn_cast<SelectInst>(V))
- setLimitsForSelectPattern(*SI, Lower, Upper);
+ setLimitsForSelectPattern(*SI, Lower, Upper, IIQ);
ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper);
@@ -5704,3 +5710,111 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) {
return CR;
}
+
+static Optional<int64_t>
+getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) {
+ // Skip over the first indices.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (unsigned i = 1; i != Idx; ++i, ++GTI)
+ /*skip along*/;
+
+ // Compute the offset implied by the rest of the indices.
+ int64_t Offset = 0;
+ for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!OpC)
+ return None;
+ if (OpC->isZero())
+ continue; // No offset.
+
+ // Handle struct indices, which add their field offset to the pointer.
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
+ Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ continue;
+ }
+
+ // Otherwise, we have a sequential type like an array or vector. Multiply
+ // the index by the ElementSize.
+ uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
+ Offset += Size * OpC->getSExtValue();
+ }
+
+ return Offset;
+}
+
+Optional<int64_t> llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2,
+ const DataLayout &DL) {
+ Ptr1 = Ptr1->stripPointerCasts();
+ Ptr2 = Ptr2->stripPointerCasts();
+
+ // Handle the trivial case first.
+ if (Ptr1 == Ptr2) {
+ return 0;
+ }
+
+ const GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
+ const GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
+
+ // If one pointer is a GEP see if the GEP is a constant offset from the base,
+ // as in "P" and "gep P, 1".
+ // Also do this iteratively to handle the the following case:
+ // Ptr_t1 = GEP Ptr1, c1
+ // Ptr_t2 = GEP Ptr_t1, c2
+ // Ptr2 = GEP Ptr_t2, c3
+ // where we will return c1+c2+c3.
+ // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base
+ // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases
+ // are the same, and return the difference between offsets.
+ auto getOffsetFromBase = [&DL](const GEPOperator *GEP,
+ const Value *Ptr) -> Optional<int64_t> {
+ const GEPOperator *GEP_T = GEP;
+ int64_t OffsetVal = 0;
+ bool HasSameBase = false;
+ while (GEP_T) {
+ auto Offset = getOffsetFromIndex(GEP_T, 1, DL);
+ if (!Offset)
+ return None;
+ OffsetVal += *Offset;
+ auto Op0 = GEP_T->getOperand(0)->stripPointerCasts();
+ if (Op0 == Ptr) {
+ HasSameBase = true;
+ break;
+ }
+ GEP_T = dyn_cast<GEPOperator>(Op0);
+ }
+ if (!HasSameBase)
+ return None;
+ return OffsetVal;
+ };
+
+ if (GEP1) {
+ auto Offset = getOffsetFromBase(GEP1, Ptr2);
+ if (Offset)
+ return -*Offset;
+ }
+ if (GEP2) {
+ auto Offset = getOffsetFromBase(GEP2, Ptr1);
+ if (Offset)
+ return Offset;
+ }
+
+ // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
+ // base. After that base, they may have some number of common (and
+ // potentially variable) indices. After that they handle some constant
+ // offset, which determines their offset from each other. At this point, we
+ // handle no other case.
+ if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
+ return None;
+
+ // Skip any common indices and track the GEP types.
+ unsigned Idx = 1;
+ for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
+ if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
+ break;
+
+ auto Offset1 = getOffsetFromIndex(GEP1, Idx, DL);
+ auto Offset2 = getOffsetFromIndex(GEP2, Idx, DL);
+ if (!Offset1 || !Offset2)
+ return None;
+ return *Offset2 - *Offset1;
+}
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 986756eb2627..600f57ab9d71 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -56,6 +56,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
case Intrinsic::umul_fix:
+ case Intrinsic::umul_fix_sat:
case Intrinsic::sqrt: // Begin floating-point.
case Intrinsic::sin:
case Intrinsic::cos:
@@ -98,6 +99,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
case Intrinsic::umul_fix:
+ case Intrinsic::umul_fix_sat:
return (ScalarOpdIdx == 2);
default:
return false;
@@ -830,15 +832,15 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
/*Assume=*/true, /*ShouldCheckWrap=*/false);
const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
- PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ PointerType *PtrTy = cast<PointerType>(Ptr->getType());
uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
// An alignment of 0 means target ABI alignment.
- unsigned Align = getLoadStoreAlignment(&I);
- if (!Align)
- Align = DL.getABITypeAlignment(PtrTy->getElementType());
+ MaybeAlign Alignment = MaybeAlign(getLoadStoreAlignment(&I));
+ if (!Alignment)
+ Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType()));
- AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
+ AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment);
}
}
@@ -925,7 +927,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
if (!Group) {
LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
<< '\n');
- Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
+ Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
}
if (B->mayWriteToMemory())
StoreGroups.insert(Group);
@@ -964,6 +966,10 @@ void InterleavedAccessInfo::analyzeInterleaving(
// instructions that precede it.
if (isInterleaved(A)) {
InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
+
+ LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
+ "dependence between " << *A << " and "<< *B << '\n');
+
StoreGroups.remove(StoreGroup);
releaseGroup(StoreGroup);
}
@@ -1028,7 +1034,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
// Try to insert A into B's group.
- if (Group->insertMember(A, IndexA, DesA.Align)) {
+ if (Group->insertMember(A, IndexA, DesA.Alignment)) {
LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
<< " into the interleave group with" << *B
<< '\n');